{
 "cells": [
  {
   "cell_type": "code",
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "#忽略警号\n",
    "import warnings\n",
    "warnings.filterwarnings(\"ignore\")"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-24T11:24:42.023968400Z",
     "start_time": "2024-09-24T11:24:41.986944300Z"
    }
   },
   "id": "955def2dff9332a5",
   "execution_count": 1
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "initial_id",
   "metadata": {
    "collapsed": true,
    "ExecuteTime": {
     "end_time": "2024-09-24T11:25:56.900278900Z",
     "start_time": "2024-09-24T11:25:56.369926600Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": "   user_id  age_range  gender\n0   376517        6.0     1.0\n1   234512        5.0     0.0\n2   344532        5.0     0.0\n3   186135        5.0     0.0\n4    30230        5.0     0.0",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>user_id</th>\n      <th>age_range</th>\n      <th>gender</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>376517</td>\n      <td>6.0</td>\n      <td>1.0</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>234512</td>\n      <td>5.0</td>\n      <td>0.0</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>344532</td>\n      <td>5.0</td>\n      <td>0.0</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>186135</td>\n      <td>5.0</td>\n      <td>0.0</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>30230</td>\n      <td>5.0</td>\n      <td>0.0</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "user_info = pd.read_csv('user_info_format1.csv')\n",
    "user_info.head()"
   ]
  },
  {
   "cell_type": "code",
   "outputs": [],
   "source": [
    "# 二次导入数据时，指定数据类型以压缩内存\n",
    "# d_types = {'user_id': 'int32', 'item_id': 'int32', 'cat_id': 'int16', 'seller_id': 'int16', 'brand_id': 'float32', 'time_stamp': 'int16', 'action_type': 'int8'}\n",
    "# user_log = pd.read_csv(\"user_log_format1.csv\",dtype = d_types)\n",
    "user_log = pd.read_csv(\"user_log_format1.csv\")\n",
    "user_log.head()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-24T11:25:43.793544900Z",
     "start_time": "2024-09-24T11:24:42.023968400Z"
    }
   },
   "id": "2c525cb895b58ac5",
   "execution_count": 2
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "data": {
      "text/plain": "user_id         0\nage_range    2217\ngender       6436\ndtype: int64"
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "user_info.isnull().sum()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-23T12:11:06.522872Z",
     "start_time": "2024-09-23T12:11:06.401794100Z"
    }
   },
   "id": "2e9d2aa998b4b3b9",
   "execution_count": 33
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "data": {
      "text/plain": "user_id            0\nitem_id            0\ncat_id             0\nseller_id          0\nbrand_id       91015\ntime_stamp         0\naction_type        0\ndtype: int64"
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "user_log.isnull().sum() "
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-23T12:11:13.013087600Z",
     "start_time": "2024-09-23T12:11:11.974909600Z"
    }
   },
   "id": "b52a126a0523a131",
   "execution_count": 34
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 424170 entries, 0 to 424169\n",
      "Data columns (total 3 columns):\n",
      " #   Column     Non-Null Count   Dtype  \n",
      "---  ------     --------------   -----  \n",
      " 0   user_id    424170 non-null  int64  \n",
      " 1   age_range  421953 non-null  float64\n",
      " 2   gender     417734 non-null  float64\n",
      "dtypes: float64(2), int64(1)\n",
      "memory usage: 9.7 MB\n"
     ]
    }
   ],
   "source": [
    "user_info.info()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-23T12:11:44.044609500Z",
     "start_time": "2024-09-23T12:11:43.689120800Z"
    }
   },
   "id": "7dffd89587d5fc0f",
   "execution_count": 35
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 54925330 entries, 0 to 54925329\n",
      "Data columns (total 7 columns):\n",
      " #   Column       Dtype  \n",
      "---  ------       -----  \n",
      " 0   user_id      int64  \n",
      " 1   item_id      int64  \n",
      " 2   cat_id       int64  \n",
      " 3   seller_id    int64  \n",
      " 4   brand_id     float64\n",
      " 5   time_stamp   int64  \n",
      " 6   action_type  int64  \n",
      "dtypes: float64(1), int64(6)\n",
      "memory usage: 2.9 GB\n"
     ]
    }
   ],
   "source": [
    "user_log.info()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-23T12:11:47.511681600Z",
     "start_time": "2024-09-23T12:11:47.430629Z"
    }
   },
   "id": "a57b928566bc0c2e",
   "execution_count": 36
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "data": {
      "text/plain": "(54925330, 7)"
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "user_log.shape"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-23T12:11:51.084978700Z",
     "start_time": "2024-09-23T12:11:51.008927500Z"
    }
   },
   "id": "78ef6fdac1a8b2ed",
   "execution_count": 37
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "data": {
      "text/plain": "user_id      0\nage_range    0\ngender       0\ndtype: int64"
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 去除空值\n",
    "user_info['age_range'].replace(np.nan,-1,inplace=True) # 2和NULL表示未知\n",
    "user_info['gender'].replace(np.nan,2,inplace=True)\n",
    "user_info.isnull().sum()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-24T11:26:35.859190900Z",
     "start_time": "2024-09-24T11:26:35.559991900Z"
    }
   },
   "id": "fb4b8f869391b193",
   "execution_count": 4
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "data": {
      "text/plain": "user_id        0\nitem_id        0\ncat_id         0\nseller_id      0\nbrand_id       0\ntime_stamp     0\naction_type    0\ndtype: int64"
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "user_log['brand_id'].replace(np.nan,-1,inplace=True)\n",
    "user_log.isnull().sum()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-24T11:26:43.117006100Z",
     "start_time": "2024-09-24T11:26:39.974921700Z"
    }
   },
   "id": "26c9ebd5dccdc3c3",
   "execution_count": 5
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0\n",
      "13750198\n"
     ]
    }
   ],
   "source": [
    "print(user_info.duplicated().sum())\n",
    "print(user_log.duplicated().sum())"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-23T11:36:03.389535800Z",
     "start_time": "2024-09-23T11:34:56.585637100Z"
    }
   },
   "id": "b05a9703db44e02a",
   "execution_count": 11
  },
  {
   "cell_type": "code",
   "outputs": [],
   "source": [
    "# user_log.drop_duplicates(inplace=True)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-20T06:16:22.088839800Z",
     "start_time": "2024-09-20T06:15:26.813650400Z"
    }
   },
   "id": "ea2c96927e2f314b",
   "execution_count": 13
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "data": {
      "text/plain": "   user_id  merchant_id  label\n0    34176         3906      0\n1    34176          121      0\n2    34176         4356      1\n3    34176         2217      0\n4   230784         4818      0",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>user_id</th>\n      <th>merchant_id</th>\n      <th>label</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>34176</td>\n      <td>3906</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>34176</td>\n      <td>121</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>34176</td>\n      <td>4356</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>34176</td>\n      <td>2217</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>230784</td>\n      <td>4818</td>\n      <td>0</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train = pd.read_csv('train_format1.csv')\n",
    "train.head()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-24T11:26:46.403186500Z",
     "start_time": "2024-09-24T11:26:45.996917Z"
    }
   },
   "id": "75111d5dd5f7e736",
   "execution_count": 6
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "data": {
      "text/plain": "<Axes: xlabel='age_range'>"
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": "<Figure size 640x480 with 1 Axes>",
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjwAAAG0CAYAAAA2BP2yAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8fJSN1AAAACXBIWXMAAA9hAAAPYQGoP6dpAAAyu0lEQVR4nO3de1xVdb7/8ffegLJBBYyELjZOAjXdEYQwLdMhT2NeRrE6w3S0U1ZgF/s9sJuUjgZi05TDaaQZy8NptJygrExrtKvk8YLlWHnSYTsnZSJFUJDNRYG9fn/4Y/+GtAlww2Z/fT0fDx8P9/6utb6fz9rL7Zu9FnvZLMuyBAAAYDC7rwsAAADobgQeAABgPAIPAAAwHoEHAAAYj8ADAACMR+ABAADGI/AAAADjEXgAAIDxCDwAAMB4BB4AAGC8QF8X0JtUV9epJ260YbNJZ53Vv8fm60n05p/ozT/Rm/8yub+e7K1tro4g8PwDy1KPHng9PV9Pojf/RG/+id78l8n99bbeOKUFAACMR+ABAADGI/AAAADjEXgAAIDxCDwAAMB4BB4AAGA8Ag8AADAegQcAABiPwAMAAIxH4AEAAMYj8AAAAOMReAAAgPEIPAAAwHjcLR0wjN1uk91u69K6AQGd/xnI7bbkdveiWyIDwCkQeACD2O02hYWHKLALwUWSIiJCO71OS6tbtTUNhB4AvRqBBzCI3W5TYIBdD6zaIWelq9vnixnUT7+9NV52u43AA6BXI/AABnJWurSr4qivywCAXoOLlgEAgPEIPAAAwHgEHgAAYDwCDwAAMB6BBwAAGI/AAwAAjEfgAQAAxiPwAAAA4xF4AACA8Qg8AADAeAQeAABgPAIPAAAwHoEHAAAYj8ADAACMR+ABAADGI/AAAADjEXgAAIDxCDwAAMB4BB4AAGA8Ag8AADAegQcAABiPwAMAAIxH4AEAAMYj8AAAAOMReAAAgPEIPAAAwHgEHgAAYDwCDwAAMB6BBwAAGI/AAwAAjNflwHP48GGlpqZq69atnud27typadOmKT4+XmPGjFFRUVG7dVavXq3U1FRdddVVmjJlinbs2OEZa21t1eLFizVixAjFx8crIyNDlZWVnvHq6mplZmYqMTFRycnJysnJUUtLS4fnBgAAZ64uBZ5PP/1Ut9xyi/bv3+95rra2VnfddZcmT56s0tJS5eTkaNGiRfr8888lSVu3btXChQuVl5en0tJSTZw4URkZGWpsbJQkFRQUaNOmTXrttddUUlKi4OBgZWdne7Y/e/ZshYSEqKSkRMXFxdq8ebMKCws7NDcAADizdTrwrF69WllZWXrwwQfbPb9+/XqFh4crPT1dgYGBSklJ0YQJE7Ry5UpJUlFRkcaPH6+EhAQFBQVpxowZioiI0Lp16zzjM2fO1DnnnKN+/fpp7ty52rhxo8rLy7Vv3z5t27ZNc+bMkcPh0ODBg5WZmenZ9g/NDQAAzmyBnV1h5MiRmjBhggIDA9uFnrKyMsXFxbVbNiYmRsXFxZIkp9OpqVOnnjS+e/du1dXV6cCBA+3Wj4yMVFhYmPbs2SNJCg8PV1RUlGd86NChqqio0NGjR39w7o6y2Tq1eJe1zdNT8/Ukejtz9db9YvLrRm/+y+T+erK3zszR6cBz9tlnn/L5+vp6ORyOds8FBweroaHhB8fr6+slSSEhISeNt419d922x23r/7O5O+qss/p3avnT1dPz9SR6O7NERIT6uoQfZPLrRm/+y+T+eltvnQ4838fhcKiurq7dc01NTQoNDfWMNzU1nTQeERHhCStt1/N8d33Lsk4aa3scGhr6g3N3VHV1nSyrU6t0ic124kDoqfl6Er35VkCA3Sfh48iRerW2unt83o7wh9etq+jNf5ncX0/21jZXR3gt8MTFxWnTpk3tnnM6nYqNjZUkxcbGqqys7KTxa6+9VmFhYYqKipLT6fScmjp06JBqamoUFxcnt9utmpoaVVVVKTIyUpK0d+9eRUdHq3///j84d0dZlnr0wOvp+XoSvZ15evs+Mfl1ozf/ZXJ/va03r30PT2pqqqqqqlRYWKjm5mZt2bJFa9as8Vy3k5aWpjVr1mjLli1qbm5WYWGhqqurlZqaKkmaMmWKCgoKVF5eLpfLpdzcXCUlJemCCy7QkCFDlJCQoNzcXLlcLpWXl2vp0qVKS0vr0NwAAODM5rVPeCIiIrR8+XLl5OQoPz9fAwcOVHZ2tq6++mpJUkpKiubNm6f58+fr4MGDiomJ0bJlyxQeHi5JmjVrllpaWpSenq76+nolJydryZIlnu3n5+drwYIFGjt2rOx2uyZPnqzMzMwOzQ0AAM5sNsvqTR84+VZVVc9dwxMZ2b/H5utJ9OZbgYEnruEZn1+iXRVHu32+S88doLX3j9KRI/Vqaem91/D09tetq+jNf5ncX0/21jZXR3BrCQAAYDwCDwAAMB6BBwAAGI/AAwAAjEfgAQAAxiPwAAAA4xF4AACA8Qg8AADAeAQeAABgPAIPAAAwHoEHAAAYj8ADAACMR+ABAADGI/AAAADjEXgAAIDxAn1dgL+z222y221dWjcgoPN50+225HZbXZoPAIAzFYHnNNjtNoWFhyiwC8FFkiIiQju9TkurW7U1DYQeAAA6gcBzGux2mwID7Hpg1Q45K13dPl/MoH767a3xstttBB4AADqBwOMFzkqXdlUc9XUZAADge3DRMgAAMB6BBwAAGI/AAwAAjEfgAQAAxiPwAAAA4xF4AACA8Qg8AADAeAQeAABgPAIPAAAwHoEHAAAYj8ADAACMR+ABAADGI/AAAADjEXgAAIDxCDwAAMB4BB4AAGA8Ag8AADAegQcAABiPwAMAAIxH4AEAAMYj8AAAAOMReAAAgPEIPAAAwHgEHgAAYDwCDwAAMB6BBwAAGI/AAwAAjEfgAQAAxiPwAAAA4xF4AACA8Qg8AADAeAQeAABgPAIPAAAwHoEHAAAYz6uBZ9euXUpPT1diYqJGjhypJ598UsePH5ck7dy5U9OmTVN8fLzGjBmjoqKiduuuXr1aqampuuqqqzRlyhTt2LHDM9ba2qrFixdrxIgRio+PV0ZGhiorKz3j1dXVyszMVGJiopKTk5WTk6OWlhZvtgYAAPyY1wKP2+3W3XffrXHjxmnbtm0qLi7WJ598omXLlqm2tlZ33XWXJk+erNLSUuXk5GjRokX6/PPPJUlbt27VwoULlZeXp9LSUk2cOFEZGRlqbGyUJBUUFGjTpk167bXXVFJSouDgYGVnZ3vmnj17tkJCQlRSUqLi4mJt3rxZhYWF3moNAAD4Oa8FntraWh06dEhut1uWZZ3YuN0uh8Oh9evXKzw8XOnp6QoMDFRKSoomTJiglStXSpKKioo0fvx4JSQkKCgoSDNmzFBERITWrVvnGZ85c6bOOecc9evXT3PnztXGjRtVXl6uffv2adu2bZozZ44cDocGDx6szMxMz7YBAAACvbWhiIgIzZgxQ4sXL9ZTTz2l1tZWjR07VjNmzFBeXp7i4uLaLR8TE6Pi4mJJktPp1NSpU08a3717t+rq6nTgwIF260dGRiosLEx79uyRJIWHhysqKsozPnToUFVUVOjo0aMaMGBAh3uw2Trdts/01lrb6uqt9Z0Ok3vzhp7YL3a7TbZOTtS2eGCgXf/vZ7EOsyxLbncnV+pBJh+TJvcmmd1fT/bWmTm8FnjcbreCg4P1+OOPKy0tTfv27dO9996r/Px81dfXy+FwtFs+ODhYDQ0NkvRPx+vr6yVJISEhJ423jX133bbHDQ0NnQo8Z53Vv8PL+lJERKivS/hB/rIvu8Lk3rqqp47JVrelAHvX3kXDwztf4+nM15NMPiZN7k0yu7/e1pvXAs+GDRv05z//We+++64kKTY2VrNmzVJOTo4mTJigurq6dss3NTUpNPTEG5DD4VBTU9NJ4xEREZ7w0nY9z3fXtyzrpLG2x23b76jq6rpO/QQYEGD3Sfg4cqRera3uHp+3I2y2Ewd5Z/elP/CH3kw+Jtt6e2DVDjkrXd06lyTFDOqn394az783HzG5N8ns/nqyt7a5OsJrgefbb7/1/EaWZ+OBgQoKClJcXJw2bdrUbszpdCo2NlbSiXBUVlZ20vi1116rsLAwRUVFyel0ek5rHTp0SDU1NYqLi5Pb7VZNTY2qqqoUGRkpSdq7d6+io6PVv3/n0qVlyW8OvN5epz/ty84yubfT0VP7xFnp0q6Koz0z2f/T219vk49Jk3uTzO6vt/XmtYuWR44cqUOHDun5559Xa2urysvLVVBQoAkTJig1NVVVVVUqLCxUc3OztmzZojVr1niu20lLS9OaNWu0ZcsWNTc3q7CwUNXV1UpNTZUkTZkyRQUFBSovL5fL5VJubq6SkpJ0wQUXaMiQIUpISFBubq5cLpfKy8u1dOlSpaWleas1AADg57z2CU9MTIx+//vfa8mSJXrhhRfUv39/TZw4UbNmzVKfPn20fPly5eTkKD8/XwMHDlR2drauvvpqSVJKSormzZun+fPn6+DBg4qJidGyZcsUHh4uSZo1a5ZaWlqUnp6u+vp6JScna8mSJZ658/PztWDBAo0dO1Z2u12TJ09WZmamt1oDAAB+zmuBR5JGjBihESNGnHLs8ssv16pVq7533UmTJmnSpEmnHAsKClJWVpaysrJOOR4ZGan8/PzOFwwAAM4I3FoCAAAYj8ADAACMR+ABAADGI/AAAADjEXgAAIDxCDwAAMB4BB4AAGA8Ag8AADAegQcAABiPwAMAAIxH4AEAAMYj8AAAAOMReAAAgPEIPAAAwHgEHgAAYDwCDwAAMB6BBwAAGI/AAwAAjEfgAQAAxiPwAAAA4xF4AACA8Qg8AADAeAQeAABgPAIPAAAwHoEHAAAYj8ADAACMR+ABAADGI/AAAADjEXgAAIDxCDwAAMB4BB4AAGA8Ag8AADAegQcAABiPwAMAAIxH4AEAAMYj8AAAAOMReAAAgPEIPAAAwHgEHgAAYDwCDwAAMB6BBwAAGI/AAwAAjEfgAQAAxiPwAAAA4xF4AACA8Qg8AADAeAQeAABgPAIPAAAwHoEHAAAYj8ADAACMR+ABAADGI/AAAADjEXgAAIDxvBp4ampq9NBDDyk5OVnDhw9XZmamKisrJUk7d+7UtGnTFB8frzFjxqioqKjduqtXr1ZqaqquuuoqTZkyRTt27PCMtba2avHixRoxYoTi4+OVkZHh2a4kVVdXKzMzU4mJiUpOTlZOTo5aWlq82RoAAPBjXg089913nxoaGrRhwwZ9+OGHCggI0OOPP67a2lrdddddmjx5skpLS5WTk6NFixbp888/lyRt3bpVCxcuVF5enkpLSzVx4kRlZGSosbFRklRQUKBNmzbptddeU0lJiYKDg5Wdne2Zd/bs2QoJCVFJSYmKi4u1efNmFRYWerM1AADgx7wWeL788kvt3LlTeXl5GjBggPr166eFCxcqKytL69evV3h4uNLT0xUYGKiUlBRNmDBBK1eulCQVFRVp/PjxSkhIUFBQkGbMmKGIiAitW7fOMz5z5kydc8456tevn+bOnauNGzeqvLxc+/bt07Zt2zRnzhw5HA4NHjxYmZmZnm0DAAAEemtDn3/+uWJiYvTqq6/qlVdeUWNjo0aNGqWHH35YZWVliouLa7d8TEyMiouLJUlOp1NTp049aXz37t2qq6vTgQMH2q0fGRmpsLAw7dmzR5IUHh6uqKgoz/jQoUNVUVGho0ePasCAAR3uwWbrdNs+01trbaurt9Z3OkzuzRtM3i+9tTeTj0mTe5PM7q8ne+vMHF4LPLW1tdqzZ48uu+wyrV69Wk1NTXrooYf08MMPKzIyUg6Ho93ywcHBamhokCTV19d/73h9fb0kKSQk5KTxtrHvrtv2uKGhoVOB56yz+nd4WV+KiAj1dQk/yF/2ZVeY3FtX+cMx2VX+0JvJx6TJvUlm99fbevNa4OnTp48kae7cuerbt6/69eun2bNn6+abb9aUKVPU1NTUbvmmpiaFhp54I3E4HKccj4iI8ISXtut5vru+ZVknjbU9btt+R1VX18myOr58QIDdJ2+GR47Uq7XV3ePzdoTNduIg7+y+9Af+0JvJx6TJvXWVPxyTXWVyb5LZ/fVkb21zdYTXAk9MTIzcbream5vVt29fSZLbfeJN4ic/+Ylefvnldss7nU7FxsZKkmJjY1VWVnbS+LXXXquwsDBFRUXJ6XR6TmsdOnRINTU1iouLk9vtVk1NjaqqqhQZGSlJ2rt3r6Kjo9W/f+fSpWXJbw683l6nP+3LzjK5t9Nh8j7p7b2ZfEya3Jtkdn+9rTevXbQ8YsQIDR48WI899pjq6+t1+PBhPfvss/rpT3+qm266SVVVVSosLFRzc7O2bNmiNWvWeK7bSUtL05o1a7RlyxY1NzersLBQ1dXVSk1NlSRNmTJFBQUFKi8vl8vlUm5urpKSknTBBRdoyJAhSkhIUG5urlwul8rLy7V06VKlpaV5qzUAAODnvBZ4goKC9Mc//lEBAQEaN26cxo0bp+joaOXm5ioiIkLLly/Xu+++q+TkZGVnZys7O1tXX321JCklJUXz5s3T/PnzlZSUpLVr12rZsmUKDw+XJM2aNUvXXXed0tPTdd111+nYsWNasmSJZ+78/Hy1tLRo7NixuvnmmzVq1ChlZmZ6qzUAAODnvHZKS5KioqL07LPPnnLs8ssv16pVq7533UmTJmnSpEmnHAsKClJWVpaysrJOOR4ZGan8/PzOFwwAAM4I3FoCAAAYj8ADAACMR+ABAADGI/AAAADjEXgAAIDxCDwAAMB4BB4AAGA8Ag8AADAegQcAABjPq9+0DPgLu90mu93WpXUDAjr/c4Lbbcnt7kV30QOAMwyBB2ccu92msPAQBXYhuEhSRERop9dpaXWrtqaB0AMAPkLgwRnHbrcpMMCuB1btkLPS1e3zxQzqp9/eGi+73UbgAQAfIfDgjOWsdGlXxVFflwEA6AFctAwAAIxH4AEAAMYj8AAAAOMReAAAgPEIPAAAwHgEHgAAYDwCDwAAMB6BBwAAGI/AAwAAjEfgAQAAxiPwAAAA4xF4AACA8Qg8AADAeAQeAABgPAIPAAAwHoEHAAAYj8ADAACMR+ABAADGI/AAAADjEXgAAIDxCDwAAMB4BB4AAGA8Ag8AADAegQcAABiPwAMAAIxH4AEAAMYj8AAAAOMReAAAgPEIPAAAwHgEHgAAYDwCDwAAMB6BBwAAGI/AAwAAjEfgAQAAxiPwAAAA4xF4AACA8Qg8AADAeAQeAABgPAIPAAAwHoEHAAAYr1sCT2trq2677TY98sgjnud27typadOmKT4+XmPGjFFRUVG7dVavXq3U1FRdddVVmjJlinbs2NFue4sXL9aIESMUHx+vjIwMVVZWesarq6uVmZmpxMREJScnKycnRy0tLd3RGgAA8EPdEniee+45bd++3fO4trZWd911lyZPnqzS0lLl5ORo0aJF+vzzzyVJW7du1cKFC5WXl6fS0lJNnDhRGRkZamxslCQVFBRo06ZNeu2111RSUqLg4GBlZ2d7tj979myFhISopKRExcXF2rx5swoLC7ujNQDoNna7TYGB9g7/CQg48RYeENDxddr+2O02H3cL9KxAb29w8+bNWr9+vW644QbPc+vXr1d4eLjS09MlSSkpKZowYYJWrlypK664QkVFRRo/frwSEhIkSTNmzNCf/vQnrVu3TlOnTlVRUZGysrJ0zjnnSJLmzp2rkSNHqry8XG63W9u2bdPGjRvlcDg0ePBgZWZm6te//rXuvPNOb7cHAN3CbrcpLDxEgQGd/zk0IiK00+u0tLpVW9Mgt9vq9LqAP/Jq4KmurtbcuXO1dOnSdp+wlJWVKS4urt2yMTExKi4uliQ5nU5NnTr1pPHdu3errq5OBw4caLd+ZGSkwsLCtGfPHklSeHi4oqKiPONDhw5VRUWFjh49qgEDBnS4fpsf/cDTW2ttq6u31udrJu8Xejs9drtNgQF2PbBqh5yVrm6dK2ZQP/321njZ7TZZVu8MPKa/l5jcX0/21pk5vBZ43G635syZo9tvv10XX3xxu7H6+no5HI52zwUHB6uhoeEHx+vr6yVJISEhJ423jX133bbHDQ0NnQo8Z53Vv8PL+lJXfprraf6yL3uSP7xuXUVv3uOsdGlXxdEemcsfXjfT30tM7q+39ea1wPP73/9effr00W233XbSmMPhUF1dXbvnmpqaFBoa6hlvamo6aTwiIsITXtqu5/nu+pZlnTTW9rht+x1VXV2nzvywExBg98kbxpEj9Wptdff4vB1hs504yDu7L3uSya8bvXlfT/1780V/vJf4jsn99WRvbXN1hNcCz5tvvqnKykolJiZKkifAvPfee3rooYe0adOmdss7nU7FxsZKkmJjY1VWVnbS+LXXXquwsDBFRUXJ6XR6TmsdOnRINTU1iouLk9vtVk1NjaqqqhQZGSlJ2rt3r6Kjo9W/f+fSpWXJbw683l6nP+3LnmTyPqE3/9TbezP9vcTk/npbb177La13331Xn332mbZv367t27frpptu0k033aTt27crNTVVVVVVKiwsVHNzs7Zs2aI1a9Z4rttJS0vTmjVrtGXLFjU3N6uwsFDV1dVKTU2VJE2ZMkUFBQUqLy+Xy+VSbm6ukpKSdMEFF2jIkCFKSEhQbm6uXC6XysvLtXTpUqWlpXmrNQAA4Oe8/ltapxIREaHly5crJydH+fn5GjhwoLKzs3X11VdLOvFbW/PmzdP8+fN18OBBxcTEaNmyZQoPD5ckzZo1Sy0tLUpPT1d9fb2Sk5O1ZMkSz/bz8/O1YMECjR07Vna7XZMnT1ZmZmZPtAYAAPxAtwWevLy8do8vv/xyrVq16nuXnzRpkiZNmnTKsaCgIGVlZSkrK+uU45GRkcrPz+96sQAAwGjcWgIAABiPwAMAAIxH4AEAAMYj8AAAAOMReAAAgPEIPAAAwHgEHgAAYDwCDwAAMF6PfNMyAODMZrfbZLfbOr1eQEDnfy53uy253b3oJk7oFQg8AIBuZbfbFBYeosAuhJeu3EG+pdWt2poGQg/aIfAAALqV3W5TYIBdD6zaIWelq1vnihnUT7+9NV52u43Ag3YIPACAHuGsdGlXxVFfl4EzFBctAwAA4xF4AACA8Qg8AADAeAQeAABgPAIPAAAwHoEHAAAYj8ADAACMR+ABAADGI/AAAADjEXgAAIDxCDwAAMB4BB4AAGA8Ag8AADAegQcAABiPwAMAAIxH4AEAAMYj8AAAAOMReAAAgPEIPAAAwHgEHgAAYDwCDwAAMB6BBwAAGI/AAwAAjEfgAQAAxiPwAAAA4xF4AACA8Qg8AADAeAQeAABgPAIPAAAwHoEHAAAYj8ADAACMR+ABAADGI/AAAADjEXgAAIDxCDwAAMB4BB4AAGA8Ag8AADAegQcAABiPwAMAAIxH4AEAAMYj8AAAAON5NfDs3r1bt99+u5KSknTNNdfooYce0uHDhyVJO3fu1LRp0xQfH68xY8aoqKio3bqrV69WamqqrrrqKk2ZMkU7duzwjLW2tmrx4sUaMWKE4uPjlZGRocrKSs94dXW1MjMzlZiYqOTkZOXk5KilpcWbrQEAAD/mtcDT1NSkO++8U/Hx8frkk0/09ttvq6amRo899phqa2t11113afLkySotLVVOTo4WLVqkzz//XJK0detWLVy4UHl5eSotLdXEiROVkZGhxsZGSVJBQYE2bdqk1157TSUlJQoODlZ2drZn7tmzZyskJEQlJSUqLi7W5s2bVVhY6K3WAACAn/Na4KmoqNDFF1+sWbNmqU+fPoqIiNAtt9yi0tJSrV+/XuHh4UpPT1dgYKBSUlI0YcIErVy5UpJUVFSk8ePHKyEhQUFBQZoxY4YiIiK0bt06z/jMmTN1zjnnqF+/fpo7d642btyo8vJy7du3T9u2bdOcOXPkcDg0ePBgZWZmerYNAAAQ6K0NXXjhhXrhhRfaPffnP/9Zl156qcrKyhQXF9duLCYmRsXFxZIkp9OpqVOnnjS+e/du1dXV6cCBA+3Wj4yMVFhYmPbs2SNJCg8PV1RUlGd86NChqqio0NGjRzVgwIAO92CzdXhRn+uttbbV1Vvr8zWT9wu9+Sd68w2T3yt7srfOzOG1wPOPLMvSkiVL9OGHH2rFihV66aWX5HA42i0THByshoYGSVJ9ff33jtfX10uSQkJCThpvG/vuum2PGxoaOhV4zjqrf4eX9aWIiFBfl/CD/GVf9iR/eN26it78E735nsnvlb2tN68HHpfLpUcffVS7du3SihUrdNFFF8nhcKiurq7dck1NTQoNPXFAOhwONTU1nTQeERHhCS9t1/N8d33Lsk4aa3vctv2Oqq6uk2V1fPmAALtP/lEdOVKv1lZ3j8/bETbbiYO8s/uyJ5n8utGb9/XUvzdf9EdvvuMP75Vd1ZO9tc3VEV4NPPv379fMmTN17rnnqri4WAMHDpQkxcXFadOmTe2WdTqdio2NlSTFxsaqrKzspPFrr71WYWFhioqKktPp9JzWOnTokGpqahQXFye3262amhpVVVUpMjJSkrR3715FR0erf//OpUvLkt8ceL29Tn/alz3J5H1Cb/6J3nzL5PfK3tab1y5arq2t1fTp0zVs2DC9+OKLnrAjSampqaqqqlJhYaGam5u1ZcsWrVmzxnPdTlpamtasWaMtW7aoublZhYWFqq6uVmpqqiRpypQpKigoUHl5uVwul3Jzc5WUlKQLLrhAQ4YMUUJCgnJzc+VyuVReXq6lS5cqLS3NW60BAAA/57VPeF5//XVVVFTonXfe0bvvvttubMeOHVq+fLlycnKUn5+vgQMHKjs7W1dffbUkKSUlRfPmzdP8+fN18OBBxcTEaNmyZQoPD5ckzZo1Sy0tLUpPT1d9fb2Sk5O1ZMkSz/bz8/O1YMECjR07Vna7XZMnT1ZmZqa3WgMAAH7Oa4Hn9ttv1+233/6945dffrlWrVr1veOTJk3SpEmTTjkWFBSkrKwsZWVlnXI8MjJS+fn5nSsYAACcMbi1BAAAMB6BBwAAGI/AAwAAjEfgAQAAxiPwAAAA4xF4AACA8Qg8AADAeAQeAABgPAIPAAAwHoEHAAAYj8ADAACMR+ABAADGI/AAAADjEXgAAIDxCDwAAMB4BB4AAGA8Ag8AADAegQcAABiPwAMAAIxH4AEAAMYj8AAAAOMReAAAgPEIPAAAwHgEHgAAYDwCDwAAMB6BBwAAGI/AAwAAjEfgAQAAxiPwAAAA4xF4AACA8Qg8AADAeAQeAABgPAIPAAAwHoEHAAAYL9DXBQAA4M/sdpvsdluX1g0I6NznDm63Jbfb6tJcZzoCDwAAXWS32xQWHqLATgaXNhERoZ1avqXVrdqaBkJPFxB4AADoIrvdpsAAux5YtUPOSle3zhUzqJ9+e2u87HYbgacLCDwAAJwmZ6VLuyqO+roM/BNctAwAAIxH4AEAAMYj8AAAAOMReAAAgPEIPAAAwHgEHgAAYDwCDwAAMB6BBwAAGI/AAwAAjEfgAQAAxiPwAAAA4xF4AACA8Qg8AADAeAQeAABgvEBfFwAAAHonu90mu93WpXUDAjr3mYrbbcnttro0V0cYE3iqq6v1+OOPa9u2bQoICNDEiRP18MMPKzDQmBYBAOgxdrtNYeEhCuxkcGkTERHaqeVbWt2qrWnottBjTBqYPXu2oqKiVFJSoqqqKmVkZKiwsFB33nmnr0sDAMDv2O02BQbY9cCqHXJWurp1rphB/fTbW+Nlt9sIPP/Mvn37tG3bNm3cuFEOh0ODBw9WZmamfv3rXxN4AAA4Dc5Kl3ZVHPV1GafNiMBTVlam8PBwRUVFeZ4bOnSoKioqdPToUQ0YMKBD27HbJasLwfLScwfI0Seg8yt20oWR///jQXsPXG5us9lks3Xu3G3b4oGB9k7vS8uyZHXlBegiU183id68wRe9ST3TH715H72dnq721pn/omxWT/4P003efPNNPfvss/roo488z+3fv1+pqan6+OOPFR0d7bviAACAzxnxa+khISFqbGxs91zb49DQzl00BQAAzGNE4ImNjVVNTY2qqqo8z+3du1fR0dHq37+/DysDAAC9gRGBZ8iQIUpISFBubq5cLpfKy8u1dOlSpaWl+bo0AADQCxhxDY8kVVVVacGCBdq6davsdrsmT56srKwsBQR0/8WNAACgdzMm8AAAAHwfI05pAQAA/DMEHgAAYDwCDwAAMB6BBwAAGI/AAwAAjEfgAQAAxiPwAAAA4xlxt/TerLKyUq+++qp2796thoYGhYaGKjY2VpMmTdKPfvQjX5eH72H662Zqf6b21cbk/ujNP/lTb3zC040++ugj3XDDDdq5c6cGDx6sK6+8Uueff76++OILTZo0SSUlJb4u8bRVVlbqueee07333qt///d/13333af8/Hzt27fP16V1memvm6n9mdpXG5P7ozf/5He9Weg2P/vZz6x169adcmzt2rXWTTfd1MMVedeHH35oXXnlldadd95p5eXlWUuWLLHy8vKsO++807ryyiutjRs3+rrELjH9dTO1P1P7amNyf/Tmn/ytNwJPN7rqqqus1tbWU461tLRYw4YN6+GKvMvfDvaOMv11M7U/U/tqY3J/9Oaf/K03Tml1o/PPP18fffTRKcc2bNigwYMH92xBXlZRUaFx48adcmzcuHGqqKjo4Yq8w/TXzdT+TO2rjcn90Zt/8rfeuGi5G2VlZen+++9XYmKi4uLiFBISosbGRjmdTm3btk2/+93vfF3iaWk72MeMGXPSWG882DvK9NfN1P5M7auNyf3Rm3/yt964W3o3+/rrr/XGG2/I6XSqvr5eDofDcwX7hRde6OvyTsvHH3/8gwf7Nddc4+syu8Tk100ytz9T+2pjcn/05p/8qTcCD06LPx3sAIAzF9fw+NDbb7/t6xJO25AhQzR79mw999xz+s///E8tXbpUDz74oNFhx4TX7Z8xtT9T+2pjcn/05p96W298wuND8fHx2rFjh6/L6DZvv/22brrpJl+X4XWmv26m9mdqX21M7o/e/FNv643A00NcLpfq6+sVGhqqfv36+bqcHtHbDnZ8v7q6OjkcDgUG8nsM/qShoUFBQUEKCgrydSle19jYKMuyFBIS4utSYAhOaXUjt9ut5cuXa8yYMRo+fLhGjx6t4cOH6/rrr9fvfvc7mZ41/TXsHDlyRPfcc4+GDx+uGTNmyOl0thsfNmyYjyrzjmPHjum5557Tyy+/rKamJs2cOVNJSUkaNmyYFi5cqObmZl+X6FVJSUm+LsErHn74Yc/fjx49qnvuuUeJiYmKj4/XE088oePHj/uwutNz6NAh3XPPPdq7d69qa2t1zz33KCEhQYmJibr//vvlcrl8XWKXXXHFFb3u1I63HD9+XAUFBSosLJQk5efn6+qrr9aoUaP09NNPq6WlxbcFfgef8HSj3Nxcbd68WRkZGYqJiZHD4fD8FlNBQYGuvfZazZkzx9dl4jsefvhh1dTU6JZbbtG7776rjz/+WCtXrlRMTIwk///k6sknn9TWrVt1/PhxDRo0SDabTVlZWTp+/LieeuopjRw5Uvfff7+vy+y0Rx999JTPr1mzRhMmTJAkLVq0qCdL8qphw4bps88+kyQ98cQT+vrrrzVnzhwdO3ZMTz/9tK644go99thjPq6ya2bNmqWAgAAtXLhQeXl5qqys1OzZs9Xa2qolS5bonHPO8dvX7rLLLtPAgQM1evRoPfLII0Z9YpWbm6uSkhLZ7Xb9+Mc/VllZme69914FBATo+eef19ixY/XAAw/4ukwPAk83SklJUVFRkc4///yTxsrLy3Xrrbdq06ZNPqjMO0pLS39wmeHDh/dAJd41cuRIrV27VmFhYZKkZ599Vm+//bZef/11hYWFtfuPxx+NHDlSb7zxhg4fPqxJkyZp48aNOvvssyVJ33zzjf7t3/5N77//vo+r7LzbbrtNn332mW644QYFBwd7nv/Ha8n89T9NqX3QHj16tFatWqXo6GhJJ74EdNq0aX77fpKcnKwPP/xQISEhGjVqlN58800NHDhQknT48GHdeOON2rp1q4+r7Jphw4bpnXfe0Zw5c/S3v/1NGRkZmjp1artj1F+NGjVKxcXFsixL119/vd5++20NHTpU0on/46ZPn64PPvjAx1X+f5yw70YtLS0aNGjQKccGDhyo1tbWHq7Iu+bOnavy8vLvPTVns9n01Vdf9XBVp6+5ubnddVYPPvig/va3v+n//J//oxdffNHvT0U2NjYqMjJSkZGRGjRokCfYSdKgQYNUV1fnw+q67r/+67/0H//xH3rvvff09NNP66KLLpIkvf/++34ddNrYbDbP3+12uyIiIjyPo6Ki1NTU5IuyvMbtdkuSHA5HuzAQHBzs99coRUVF6aWXXtIbb7yhgoICPfPMM0pNTVViYqKioqI0atQoX5fYJU1NTYqKipLb7VZAQIAuuOACz9h5553X695LuIanGyUlJSk7O1tVVVXtnj98+LCeeOIJJScn+6gy71i1apUGDx6sZ599Vrt37z7pjz+GHUm69NJLVVBQ0C7YLFq0SN98843fnjL4R0OHDtUbb7wh6cSXR/bp00fSiYD+zDPP6PLLL/dhdV1nt9v1wAMP6LHHHlNmZqZWrlzp65K86tixY3rsscf08ssvKzY2VmvXrvWMFRYWKjY21ofVnZ7rrrtOjz32mOrr6/Wv//qveuqpp9Tc3Kz6+no9+uijfv9e2Wby5Ml699139dxzz8nhcOill17qVad8Ouuiiy7SypUrtWLFCrndbr3++uuesRdeeKHXfT0Jp7S60eHDh/XAAw9o+/btCgsL83wTcU1NjRISEpSfn+/52NZfffrpp5ozZ47ee+892e1m5Ofdu3dr5syZ+slPfqI//OEPnuf379+v6dOn68CBA34b5iRp8+bNuueee7R58+Z21xPceOONOnbsmJYtW+b5WNpfVVdX66GHHlLfvn21bds2bd++3dclnbZ169bpiy++0Jdffqldu3bp0ksv1R//+Ef95je/0YoVK7Rs2TIlJib6uswuqa2t1b333qsvvvhCQ4cO1Z49eyRJlmXp/PPP14oVKzynXf2Nv1/z98/8z//8j+6++25VVVVp+vTp6tu3rzZs2KDjx4/r0KFDev7555WSkuLrMj0IPD1g//79KisrU319vUJCQhQbG6sf/ehHvi7La9544w2NGjVKZ511lq9L8Zpjx46poqJCP/7xj9s9f/ToUb3++uuaMWOGbwrzksOHD58Utnfs2KGLLrrImIsqLcvS888/r7feekvvvPOOr8vxKsuydPjwYZ111lnas2ePwsPDFRUV5euyTtuXX36pL774QrW1terTp4+GDh2qa665xq+/LuEfL5o3kWVZqq2tVXh4uCzL0oYNG1RRUaFRo0b1uh+cCDwAAMB4ZpyDAAAA+CcIPAAAwHgEHgAAYDwCDwAAMB6BBwAAGI/AA6DbffDBB7r11luVkpKiK6+8Ur/85S/19ddfS5LWrl2rcePGKTExUXfccYcef/xxPfLII5JO/MrrSy+95Bn/xS9+oS+//LJDc/7973/XRRddpLy8PA0fPly/+tWvdPz4cS1evFg33nij4uPjlZKSooULF3q+ZPK2227Tb37zG6Wnpys+Pl433nij1q1b126bd9xxh4YNG6Z/+Zd/UWFhoecbnSVp165duu222zR8+HDdcMMNKiws9Ptv5gaMYQFAN/r222+tyy67zHr//fcty7Ksw4cPW7/4xS+srKws67PPPrMuvfRS6/3337eam5ut9evXW5dccon18MMPW5ZlWStWrLBGjx5tffXVV9bx48etoqIiKzEx0Tp06NAPzlteXm7FxcVZ2dnZ1rFjx6za2lrrD3/4gzV+/Hjr4MGDlmVZ1meffWZdcskl1n//939blmVZv/zlL62kpCRr165d1rFjx6xnnnnGSkhIsJqamqyWlhbrZz/7mfXII49Y9fX11t///ndr0qRJVlxcnGVZlnXgwAErISHBWrFihXX8+HGrrKzMSk1NtV555ZXu2K0AOolPeAB0q4EDB2rt2rUaM2aMXC6XDhw4oIiICB08eFCvvfaabrjhBo0ZM0aBgYFKTU3VT3/6U8+6K1eu1N13362LL75YQUFBSktL09ChQ/XWW291eP7JkyerT58+GjBggG6++WYVFhbq7LPPVmVlpZqamhQaGqqDBw96lh83bpwuueQS9enTRz//+c9VV1en6upq/eUvf9HXX3+txx9/XCEhITrvvPP04IMPetZ76623NHToUKWnpysoKEgxMTG64447jLvFBeCv/PfrKwH4haCgIL399ttatWqVbDab4uLi5HK5FBgYqG+//VaXXHJJu+UHDx7suf/cN998o8WLF+vpp5/2jLe0tOiyyy7r8Pz/eAPfxsZGLViwQKWlpYqOjtYll1wiy7I8N66U1O4WBm3f8Ot2uz1B7R+/ifr888/3/P2bb77Rrl272t3eoe2migB8j8ADoFu98847WrFihV555RXPLVUWLlyov/71rzrvvPNUUVHRbvmKigrPDU2jo6N1//33a/z48Z7x/fv3Kzw8vMPz/+NdxrOzsxUWFqZPPvlEffv2ldvt1vDhwzu0nXPPPVeHDx9WY2OjHA6Hp9Y20dHRSk5O1osvvuh57siRI6qvr+9wrQC6D6e0AHSruro62e12BQcHy7Isbdy4UW+88Yaam5s1bdo0bdiwQSUlJWptbdXHH3+s9evXe9a9+eabVVBQoL1790qSSkpKNH78eJWWlnapFpfLpb59+8put8vlcumpp56Sy+VSc3PzD6575ZVXKiYmRnl5eWpsbNTBgweVn5/vGZ8wYYL+8pe/6K233lJLS4sqKyt1zz33KC8vr0u1AvAuPuEB0K1+/vOf69NPP9X48eMVEBCgCy+8UNOnT9fKlSt10UUX6Ve/+pXmz5+vI0eOKDExUSkpKQoKCpIkzZgxQ5ZlKTMzU5WVlYqKitITTzyhsWPHdqmW7OxsPfHEE0pKSlJoaKhGjx6tUaNG6a9//esPrmu325Wfn6958+YpJSVF0dHRGjNmjL766itJ0nnnnacXXnhBTz/9tJ588kkFBARo9OjRmjt3bpdqBeBd3DwUgM/87//+r9xud7u7Kt9333268MIL210Q3Bs0NTVpx44dSkpK8lyX88EHH2jevHkqKSnxcXUAfgintAD4jNPp1PTp07V//35J0tatW1VSUqLrrrvOx5WdLCgoSLNnz9arr74qt9ut6upqLV++XNdff72vSwPQAXzCA8CnCgoK9Kc//Um1tbU677zzdPfdd2vChAk/uF5ycrKOHz/+veNr167Vueee681StX37dj311FPau3ev+vbtq3HjxmnOnDntfnMLQO9E4AEAAMbjlBYAADAegQcAABiPwAMAAIxH4AEAAMYj8AAAAOMReAAAgPEIPAAAwHgEHgAAYDwCDwAAMN7/BbXh96HuLq2yAAAAAElFTkSuQmCC"
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "tags = user_info.age_range.value_counts().sort_index()\n",
    "tags.plot.bar()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-24T11:26:54.346457400Z",
     "start_time": "2024-09-24T11:26:49.184031600Z"
    }
   },
   "id": "66cccbdca1823d35",
   "execution_count": 7
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "data": {
      "text/plain": "<Axes: xlabel='gender'>"
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": "<Figure size 640x480 with 1 Axes>",
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjwAAAG4CAYAAABBxj3JAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8fJSN1AAAACXBIWXMAAA9hAAAPYQGoP6dpAAArFUlEQVR4nO3df1DUd2L/8RcLJiyIsEgOdU7PRsDrxcmJIIgmacWjmY5VCeBph8voZdQKJBdNJNectKY6oCZTY0hP2mosTfUmd6DeHcbkbK6XQIwgZxnN2GBYM6ckjD9AEXaVysJ+/7DsN0StkCIffX+ejxlmZN/72c/7zWeXfbL7QYL8fr9fAAAABnNYPQEAAIA7jeABAADGI3gAAIDxCB4AAGA8ggcAABiP4AEAAMYjeAAAgPEIHgAAYDyCBwAAGC/E6gncTdraOmWn/3c6KEgaPTrCduu2K463vXC87cWux7tv3QNB8HyJ3y9b3VH62HXddsXxtheOt71wvG+Nt7QAAIDxCB4AAGA8ggcAABiP4AEAAMYjeAAAgPEIHgAAYDyCBwAAGI/gAQAAxiN4AACA8QgeAABgPIIHAAAYj+ABAADGI3gAAIDxCB4AAGC8EKsnANiNwxEkhyPIsv0HB1vzc05vr1+9vX5L9g0ABA8wjByOIEVGhSnEouiQJJcr3JL9+np6dbn9CtEDwBIEDzCMHI4ghQQ79OxbDXKf91g9nWET942Rem1xohyOIIIHgCUIHsAC7vMenWjpsHoaAGAbnLQMAACMR/AAAADjETwAAMB4BA8AADAewQMAAIxH8AAAAOMRPAAAwHgEDwAAMB7BAwAAjEfwAAAA4xE8AADAeAQPAAAwHsEDAACMR/AAAADjETwAAMB4BA8AADAewQMAAIxH8AAAAOMRPAAAwHgEDwAAMB7BAwAAjEfwAAAA4xE8AADAeAQPAAAwHsEDAACMR/AAAADjETwAAMB4BA8AADAewQMAAIxH8AAAAOMRPAAAwHgEDwAAMB7BAwAAjEfwAAAA4xE8AADAeAQPAAAwHsEDAACMN6jgaWxs1A9/+EOlpKRo1qxZeuGFF3Tx4kVJ0rp16zRlyhQlJiYGPn7+858Htt23b58yMjI0depUZWVlqaGhITDW09OjzZs3a+bMmUpMTFReXp7Onz8fGG9ra1N+fr6Sk5OVmpqq4uJi+Xy+wPixY8e0cOFCJSYmKj09XRUVFV/7CwIAAMwz4ODp6urSsmXLlJiYqA8//FD79+9Xe3u7fvKTn0iSPv74Y23YsEENDQ2Bj0WLFkmS6urqtGHDBm3atEn19fWaP3++8vLydPXqVUlSWVmZDh06pD179qimpkahoaEqKioK7HvVqlUKCwtTTU2NKisrdfjwYZWXl0uSLl++rBUrVigzM1P19fUqLi7Wxo0bdfz48aH6GgEAgHvcgIOnpaVF3/72t1VQUKD77rtPLpdLixYtUn19va5du6ZPP/1UU6ZMuem2FRUVmjt3rpKSkjRixAgtXbpULpdLBw4cCIwvX75cY8eO1ciRI7V27VpVV1erublZp0+f1pEjR1RYWCin06nx48crPz9fu3fvliQdPHhQUVFRys3NVUhIiNLS0jRv3rzAOAAAQMhAr/jggw9qx44d/S77zW9+o4ceekiNjY3y+XwqLS3V0aNHFRERoezsbC1btkwOh0Nut1vZ2dn9to2Li1NjY6M6Ozt19uxZJSQkBMZiYmIUGRmpkydPSpKioqIUGxsbGJ80aZJaWlrU0dGhpqamftv23XZlZeXAvwr/Iyho0Jvc0/rWa7d1w1rc34YHj297sevxHsx6Bxw8X+b3+7V161b97ne/065du9Ta2qqUlBQ9+eST2rJliz755BMVFBTI4XBo2bJl8nq9cjqd/W4jNDRUV65ckdfrlSSFhYXdMN439tVt+z7v2/5Wtz1Yo0dHDHobE9h13Rh+Lle41VOwHR7f9sLxvrVBB4/H49GLL76oEydOaNeuXZo8ebImT56sWbNmBa7z8MMPa8mSJTpw4ICWLVsmp9Oprq6ufrfT1dUll8sViJW+83m+PB4eHi6/33/DWN/n4eHhcjqd6uzsvOm2g9XW1im/f9Cb3bOCgq4/OOy2bisFBzts/aR/6ZJXPT29Vk/DFnh824tdj3ffugdiUMFz5swZLV++XOPGjVNlZaWio6MlSe+9955aW1u1ePHiwHWvXbum0NBQSVJ8fLyampr63Zbb7dZjjz2myMhIxcbGyu12B96aunDhgtrb25WQkKDe3l61t7ertbVVMTExkqRTp05pzJgxioiIUEJCgg4dOnTDbcfHxw9maZIkv1+2uqP0seu6YQ3ua8OLx7e9cLxvbcAnLV++fFlLlizRtGnT9MYbbwRiR7r+FtfGjRt1+PBh+f1+NTQ06M033wz8llZOTo6qqqpUW1ur7u5ulZeXq62tTRkZGZKkrKwslZWVqbm5WR6PRyUlJUpJSdGECRM0ceJEJSUlqaSkRB6PR83Nzdq2bZtycnIkSRkZGWptbVV5ebm6u7tVW1urqqqqG84ZAgAA9jXgV3j27t2rlpYWvfPOO3r33Xf7jTU0NOjFF1/USy+9pHPnzikmJkbPPPOMFixYIElKS0vTunXrAuNxcXHavn27oqKiJEkFBQXy+XzKzc2V1+tVamqqtm7dGrj90tJSrV+/XnPmzJHD4VBmZqby8/MlSS6XSzt37lRxcbFKS0sVHR2toqIizZgx4//4pQEAAKYI8vt58atPa6v93vuMiYmw3bqtFBJy/RyeuaU1OtHSYfV0hs1D40bp7R89qkuXvPL5OIdnOPD4the7Hu++dQ8Ef1oCAAAYj+ABAADGI3gAAIDxCB4AAGA8ggcAABiP4AEAAMYjeAAAgPEIHgAAYDyCBwAAGI/gAQAAxiN4AACA8QgeAABgPIIHAAAYj+ABAADGI3gAAIDxCB4AAGA8ggcAABiP4AEAAMYjeAAAgPEIHgAAYDyCBwAAGI/gAQAAxiN4AACA8QgeAABgPIIHAAAYj+ABAADGI3gAAIDxCB4AAGA8ggcAABiP4AEAAMYjeAAAgPEIHgAAYDyCBwAAGI/gAQAAxiN4AACA8QgeAABgPIIHAAAYj+ABAADGI3gAAIDxCB4AAGA8ggcAABiP4AEAAMYjeAAAgPEIHgAAYDyCBwAAGI/gAQAAxiN4AACA8QgeAABgPIIHAAAYj+ABAADGI3gAAIDxCB4AAGC8QQVPY2OjfvjDHyolJUWzZs3SCy+8oIsXL0qSjh07poULFyoxMVHp6emqqKjot+2+ffuUkZGhqVOnKisrSw0NDYGxnp4ebd68WTNnzlRiYqLy8vJ0/vz5wHhbW5vy8/OVnJys1NRUFRcXy+fzBcZvt28AAGBvAw6erq4uLVu2TImJifrwww+1f/9+tbe36yc/+YkuX76sFStWKDMzU/X19SouLtbGjRt1/PhxSVJdXZ02bNigTZs2qb6+XvPnz1deXp6uXr0qSSorK9OhQ4e0Z88e1dTUKDQ0VEVFRYF9r1q1SmFhYaqpqVFlZaUOHz6s8vJySbrtvgEAAAYcPC0tLfr2t7+tgoIC3XfffXK5XFq0aJHq6+t18OBBRUVFKTc3VyEhIUpLS9O8efO0e/duSVJFRYXmzp2rpKQkjRgxQkuXLpXL5dKBAwcC48uXL9fYsWM1cuRIrV27VtXV1Wpubtbp06d15MgRFRYWyul0avz48crPzw/c9u32PRhBQfb7sOu6rfx6253Vx8BOH3y97fVh1+M9UCEDveKDDz6oHTt29LvsN7/5jR566CE1NTUpISGh31hcXJwqKyslSW63W9nZ2TeMNzY2qrOzU2fPnu23fUxMjCIjI3Xy5ElJUlRUlGJjYwPjkyZNUktLizo6Om6778EYPTpi0NuYwK7rxvBzucKtnoLt8Pi2F473rQ04eL7M7/dr69at+t3vfqddu3bpzTfflNPp7Hed0NBQXblyRZLk9XpvOe71eiVJYWFhN4z3jX11277P+7b/3/Y9GG1tnfL7B73ZPSso6PqDw27rtlJwsMPWT/qXLnnV09Nr9TRsgce3vdj1ePeteyAGHTwej0cvvviiTpw4oV27dmny5MlyOp3q7Ozsd72uri6Fh1//xu50OtXV1XXDuMvlCsRK3/k8X93e7/ffMNb3eXh4+G33PRh+v2x1R+lj13XDGtzXhhePb3vheN/aoH5L68yZM8rOzpbH41FlZaUmT54sSUpISFBTU1O/67rdbsXHx0uS4uPjbzkeGRmp2NhYud3uwNiFCxfU3t6uhIQExcfHq729Xa2trYHxU6dOacyYMYqIiLjtvgEAAAYcPJcvX9aSJUs0bdo0vfHGG4qOjg6MZWRkqLW1VeXl5eru7lZtba2qqqoC5+3k5OSoqqpKtbW16u7uVnl5udra2pSRkSFJysrKUllZmZqbm+XxeFRSUqKUlBRNmDBBEydOVFJSkkpKSuTxeNTc3Kxt27YpJydnQPsGAAAI8vsH9uLXv/zLv2jTpk1yOp0K+spp0Q0NDfr4449VXFysTz/9VNHR0crPz1dWVlbgOr/61a9UVlamc+fOKS4uTkVFRfrud78rSeru7tZrr72mX//61/J6vUpNTdWGDRs0evRoSVJra6vWr1+vuro6ORwOZWZmas2aNQoODpak2+57oFpb7ffeZ0xMhO3WbaWQkOvn8MwtrdGJlg6rpzNsHho3Sm//6FFduuSVz8c5PMOBx7e92PV49617QNcdaPDYgV3vKHZbt5UIHoJnuPD4the7Hu/BBA9/WgIAABiP4AEAAMYjeAAAgPEIHgAAYDyCBwAAGI/gAQAAxiN4AACA8QgeAABgPIIHAAAYj+ABAADGI3gAAIDxCB4AAGA8ggcAABiP4AEAAMYjeAAAgPEIHgAAYDyCBwAAGI/gAQAAxiN4AACA8QgeAABgPIIHAAAYj+ABAADGI3gAAIDxCB4AAGA8ggcAABiP4AEAAMYjeAAAgPEIHgAAYDyCBwAAGI/gAQAAxiN4AACA8QgeAABgPIIHAAAYj+ABAADGI3gAAIDxCB4AAGA8ggcAABiP4AEAAMYjeAAAgPEIHgAAYDyCBwAAGI/gAQAAxiN4AACA8QgeAABgPIIHAAAYj+ABAADGI3gAAIDxCB4AAGA8ggcAABiP4AEAAMYjeAAAgPG+dvBcvHhRGRkZqqurC1y2bt06TZkyRYmJiYGPn//854Hxffv2KSMjQ1OnTlVWVpYaGhoCYz09Pdq8ebNmzpypxMRE5eXl6fz584HxtrY25efnKzk5WampqSouLpbP5wuMHzt2TAsXLlRiYqLS09NVUVHxdZcGAAAM87WC5+jRo1q0aJHOnDnT7/KPP/5YGzZsUENDQ+Bj0aJFkqS6ujpt2LBBmzZtUn19vebPn6+8vDxdvXpVklRWVqZDhw5pz549qqmpUWhoqIqKigK3vWrVKoWFhammpkaVlZU6fPiwysvLJUmXL1/WihUrlJmZqfr6ehUXF2vjxo06fvz411keAAAwzKCDZ9++fVqzZo1Wr17d7/Jr167p008/1ZQpU266XUVFhebOnaukpCSNGDFCS5culcvl0oEDBwLjy5cv19ixYzVy5EitXbtW1dXVam5u1unTp3XkyBEVFhbK6XRq/Pjxys/P1+7duyVJBw8eVFRUlHJzcxUSEqK0tDTNmzcvMA4AAOwtZLAbPPLII5o3b55CQkL6RU9jY6N8Pp9KS0t19OhRRUREKDs7W8uWLZPD4ZDb7VZ2dna/24qLi1NjY6M6Ozt19uxZJSQkBMZiYmIUGRmpkydPSpKioqIUGxsbGJ80aZJaWlrU0dGhpqamftv23XZlZeWg1hYUNKir3/P61mu3dcNa3N+GB49ve7Hr8R7MegcdPA888MBNL+/s7FRKSoqefPJJbdmyRZ988okKCgrkcDi0bNkyeb1eOZ3OftuEhobqypUr8nq9kqSwsLAbxvvGvrpt3+d929/qtgdj9OiIQV3fFHZdN4afyxVu9RRsh8e3vXC8b23QwXMrs2bN0qxZswKfP/zww1qyZIkOHDigZcuWyel0qqurq982XV1dcrlcgVjpO5/ny+Ph4eHy+/03jPV9Hh4eLqfTqc7OzptuOxhtbZ3y+we1yT0tKOj6g8Nu67ZScLDD1k/6ly551dPTa/U0bIHHt73Y9Xj3rXsghix43nvvPbW2tmrx4sWBy65du6bQ0FBJUnx8vJqamvpt43a79dhjjykyMlKxsbFyu92Bt6YuXLig9vZ2JSQkqLe3V+3t7WptbVVMTIwk6dSpUxozZowiIiKUkJCgQ4cO3XDb8fHxg1qD3y9b3VH62HXdsAb3teHF49teON63NmT/D4/f79fGjRt1+PBh+f1+NTQ06M033wz8llZOTo6qqqpUW1ur7u5ulZeXq62tTRkZGZKkrKwslZWVqbm5WR6PRyUlJUpJSdGECRM0ceJEJSUlqaSkRB6PR83Nzdq2bZtycnIkSRkZGWptbVV5ebm6u7tVW1urqqqqG84ZAgAA9jRkr/BkZGToxRdf1EsvvaRz584pJiZGzzzzjBYsWCBJSktL07p16wLjcXFx2r59u6KioiRJBQUF8vl8ys3NldfrVWpqqrZu3Rq4/dLSUq1fv15z5syRw+FQZmam8vPzJUkul0s7d+5UcXGxSktLFR0draKiIs2YMWOolgcAAO5hQX4/L371aW2133ufMTERtlu3lUJCrp/DM7e0RidaOqyezrB5aNwovf2jR3Xpklc+H+fwDAce3/Zi1+Pdt+6B4E9LAAAA4xE8AADAeAQPAAAwHsEDAACMR/AAAADjETwAAMB4BA8AADAewQMAAIxH8AAAAOMRPAAAwHgEDwAAMB7BAwAAjEfwAAAA4xE8AADAeAQPAAAwHsEDAACMR/AAAADjETwAAMB4BA8AADAewQMAAIxH8AAAAOMRPAAAwHgEDwAAMB7BAwAAjEfwAAAA4xE8AADAeAQPAAAwHsEDAACMF2L1BADAZA5HkByOIMv2Hxxszc+1vb1+9fb6Ldk3cDMEDwDcIQ5HkCKjwhRiUXRIkssVbsl+fT29utx+hejBXYPgAYA7xOEIUkiwQ8++1SD3eY/V0xk2cd8YqdcWJ8rhCCJ4cNcgeADgDnOf9+hES4fV0wBsjZOWAQCA8QgeAABgPIIHAAAYj+ABAADGI3gAAIDxCB4AAGA8ggcAABiP4AEAAMYjeAAAgPEIHgAAYDyCBwAAGI/gAQAAxiN4AACA8QgeAABgPIIHAAAYj+ABAADGI3gAAIDxCB4AAGA8ggcAABiP4AEAAMb72sFz8eJFZWRkqK6uLnDZsWPHtHDhQiUmJio9PV0VFRX9ttm3b58yMjI0depUZWVlqaGhITDW09OjzZs3a+bMmUpMTFReXp7Onz8fGG9ra1N+fr6Sk5OVmpqq4uJi+Xy+Ae8bAADY19cKnqNHj2rRokU6c+ZM4LLLly9rxYoVyszMVH19vYqLi7Vx40YdP35cklRXV6cNGzZo06ZNqq+v1/z585WXl6erV69KksrKynTo0CHt2bNHNTU1Cg0NVVFRUeD2V61apbCwMNXU1KiyslKHDx9WeXn5gPYNAADsbdDBs2/fPq1Zs0arV6/ud/nBgwcVFRWl3NxchYSEKC0tTfPmzdPu3bslSRUVFZo7d66SkpI0YsQILV26VC6XSwcOHAiML1++XGPHjtXIkSO1du1aVVdXq7m5WadPn9aRI0dUWFgop9Op8ePHKz8/P3Dbt9v3QAUF2e/Druu28uttd1YfA4738LL6GNjpw65f74EKGeyd95FHHtG8efMUEhLSL3qampqUkJDQ77pxcXGqrKyUJLndbmVnZ98w3tjYqM7OTp09e7bf9jExMYqMjNTJkyclSVFRUYqNjQ2MT5o0SS0tLero6Ljtvgdq9OiIQV3fFHZdN4afyxVu9RQwjDjew4/v57c26OB54IEHbnq51+uV0+nsd1loaKiuXLly23Gv1ytJCgsLu2G8b+yr2/Z93rf9/7bvgWpr65TfP6hN7mlBQdcfHHZbt5WCgx22fhK4dMmrnp5eq6cxbDje9jreVrLr9/O+dQ/EoIPnVpxOpzo7O/td1tXVpfDw8MB4V1fXDeMulysQK33n83x1e7/ff8NY3+fh4eG33fdA+f2y1R2lj13XDWtwX7MXjvfw4vv5rQ3Zr6UnJCSoqamp32Vut1vx8fGSpPj4+FuOR0ZGKjY2Vm63OzB24cIFtbe3KyEhQfHx8Wpvb1dra2tg/NSpUxozZowiIiJuu28AAGBvQxY8GRkZam1tVXl5ubq7u1VbW6uqqqrAeTs5OTmqqqpSbW2turu7VV5erra2NmVkZEiSsrKyVFZWpubmZnk8HpWUlCglJUUTJkzQxIkTlZSUpJKSEnk8HjU3N2vbtm3KyckZ0L4BAIC9DdlbWi6XSzt37lRxcbFKS0sVHR2toqIizZgxQ5KUlpamdevW6aWXXtK5c+cUFxen7du3KyoqSpJUUFAgn8+n3Nxceb1epaamauvWrYHbLy0t1fr16zVnzhw5HA5lZmYqPz9/QPsGAAD2FuT3825fn9ZW+53sFRMTYbt1Wykk5PpJrHNLa3SipcPq6Qybh8aN0ts/elSXLnnl89nnJFaOt72Ot5Xs+v28b90DwZ+WAAAAxiN4AACA8QgeAABgPIIHAAAYj+ABAADGI3gAAIDxCB4AAGA8ggcAABiP4AEAAMYjeAAAgPEIHgAAYDyCBwAAGI/gAQAAxiN4AACA8QgeAABgPIIHAAAYj+ABAADGI3gAAIDxCB4AAGA8ggcAABiP4AEAAMYjeAAAgPEIHgAAYDyCBwAAGI/gAQAAxiN4AACA8QgeAABgPIIHAAAYj+ABAADGI3gAAIDxCB4AAGA8ggcAABiP4AEAAMYjeAAAgPEIHgAAYDyCBwAAGI/gAQAAxiN4AACA8QgeAABgPIIHAAAYj+ABAADGI3gAAIDxCB4AAGA8ggcAABiP4AEAAMYjeAAAgPEIHgAAYDyCBwAAGI/gAQAAxiN4AACA8QgeAABgvCENngMHDug73/mOEhMTAx+FhYWSpGPHjmnhwoVKTExUenq6Kioq+m27b98+ZWRkaOrUqcrKylJDQ0NgrKenR5s3b9bMmTOVmJiovLw8nT9/PjDe1tam/Px8JScnKzU1VcXFxfL5fEO5NAAAcA8b0uD5+OOPtWDBAjU0NAQ+XnnlFV2+fFkrVqxQZmam6uvrVVxcrI0bN+r48eOSpLq6Om3YsEGbNm1SfX295s+fr7y8PF29elWSVFZWpkOHDmnPnj2qqalRaGioioqKAvtdtWqVwsLCVFNTo8rKSh0+fFjl5eVDuTQAAHAPG/LgmTJlyg2XHzx4UFFRUcrNzVVISIjS0tI0b9487d69W5JUUVGhuXPnKikpSSNGjNDSpUvlcrl04MCBwPjy5cs1duxYjRw5UmvXrlV1dbWam5t1+vRpHTlyRIWFhXI6nRo/frzy8/MDtw0AABAyVDfU29urEydOyOl0aseOHerp6dGf/MmfaM2aNWpqalJCQkK/68fFxamyslKS5Ha7lZ2dfcN4Y2OjOjs7dfbs2X7bx8TEKDIyUidPnpQkRUVFKTY2NjA+adIktbS0qKOjQ6NGjRrwGoKCBr3se1rfeu22bliL+5u9cLyHh12/nw9mvUMWPBcvXtR3vvMdPf744yotLdWlS5f04x//WIWFhXrggQfkdDr7XT80NFRXrlyRJHm93luOe71eSVJYWNgN431jX9227/MrV64MKnhGj44Y8HVNYtd1Y/i5XOFWTwHDiOM9/Ph+fmtDFjwxMTH93kZyOp0qLCzU97//fWVlZamrq6vf9bu6uhQeHh647s3GXS5XIF76zuf56vZ+v/+Gsb7P+25/oNraOuX3D2qTe1pQ0PUHh93WbaXgYIetnwQuXfKqp6fX6mkMG463vY63lez6/bxv3QMxZMHT2Nio/fv36/nnn1fQ/7zGdO3aNTkcDj388MP613/9137Xd7vdio+PlyTFx8erqanphvHHHntMkZGRio2NldvtDrytdeHCBbW3tyshIUG9vb1qb29Xa2urYmJiJEmnTp3SmDFjFBExuNL1+2WrO0ofu64b1uC+Zi8c7+HF9/NbG7KTlqOiorR7927t2LFDPp9PLS0teuWVV/TEE0/o8ccfV2trq8rLy9Xd3a3a2lpVVVUFztvJyclRVVWVamtr1d3drfLycrW1tSkjI0OSlJWVpbKyMjU3N8vj8aikpEQpKSmaMGGCJk6cqKSkJJWUlMjj8ai5uVnbtm1TTk7OUC0NAADc44bsFZ4xY8bon/7pn7RlyxaVlZXp/vvv19y5c1VYWKj7779fO3fuVHFxsUpLSxUdHa2ioiLNmDFDkpSWlqZ169bppZde0rlz5xQXF6ft27crKipKklRQUCCfz6fc3Fx5vV6lpqZq69atgX2XlpZq/fr1mjNnjhwOhzIzM5Wfnz9USwMAAPe4IL+fF7/6tLba773PmJgI263bSiEh18/pmFtaoxMtHVZPZ9g8NG6U3v7Ro7p0ySufzz7ndHC87XW8rWTX7+d96x4I/rQEAAAwHsEDAACMR/AAAADjETwAAMB4BA8AADAewQMAAIxH8AAAAOMRPAAAwHgEDwAAMB7BAwAAjEfwAAAA4xE8AADAeAQPAAAwHsEDAACMR/AAAADjETwAAMB4BA8AADAewQMAAIxH8AAAAOMRPAAAwHgEDwAAMB7BAwAAjEfwAAAA4xE8AADAeAQPAAAwHsEDAACMR/AAAADjETwAAMB4BA8AADAewQMAAIxH8AAAAOMRPAAAwHgEDwAAMB7BAwAAjEfwAAAA4xE8AADAeAQPAAAwHsEDAACMR/AAAADjETwAAMB4BA8AADAewQMAAIwXYvUEAAAwhcMRJIcjyLL9Bwdb8zpGb69fvb1+S/Y9UAQPAABDwOEIUmRUmEIsig5JcrnCLdmvr6dXl9uv3NXRQ/AAADAEHI4ghQQ79OxbDXKf91g9nWET942Rem1xohyOIIIHAAC7cJ/36ERLh9XTwFdw0jIAADAer/DcBTjJDQCAO4vgsRgnud3dJ7kBAMxA8FiMk9zu7pPcAABmIHjuEpzkBgDAncNJywAAwHjGBE9bW5vy8/OVnJys1NRUFRcXy+fzWT0tAABwFzAmeFatWqWwsDDV1NSosrJShw8fVnl5udXTAgAAdwEjguf06dM6cuSICgsL5XQ6NX78eOXn52v37t1WTw0AANwFjDhpuampSVFRUYqNjQ1cNmnSJLW0tKijo0OjRo0a0O04HJLfol8YemjcKDnvC7Zm5xZ4MOb//yq8w4jsHhyOt71wvO2F4z18ggbxX9gF+f1WPcUPnV/96ld69dVX9f777wcuO3PmjDIyMvTBBx9ozJgx1k0OAABYzoj2DgsL09WrV/td1vd5eLg1/6keAAC4exgRPPHx8Wpvb1dra2vgslOnTmnMmDGKiIiwcGYAAOBuYETwTJw4UUlJSSopKZHH41Fzc7O2bdumnJwcq6cGAADuAkacwyNJra2tWr9+verq6uRwOJSZmak1a9YoONg+J44BAICbMyZ4AAAAbsWIt7QAAAD+NwQPAAAwHsEDAACMR/AAAADjETwAAMB4BA8AADCeEX88FAP3+eef6+TJk7py5YrCw8MVHx+v8ePHWz0tAADuKILHJi5cuKCioiJVV1dr1KhRcjqdunr1qi5fvqzU1FS9+uqrio6OtnqaGELnz5/XL37xCzU2NvYL3AULFuhb3/qW1dMD8H/A43vw+I8HbaKgoED333+/1q5dq9GjRwcuv3DhgkpKSuTz+fT6669bOEMMpffff1+rVq3S9OnTFRcXp9DQUHV1dcntdqu+vl6vv/66Hn30UauniSHGk6A98Pj+eggem0hKSlJ1dfVN/3q8x+PR7NmzVV9fb8HMcCfMnTtXTz/9tP78z//8hrEDBw6orKxMVVVVFswMdwpPgvbB4/vr4S0tmwgNDZXH47lp8LS3tyssLMyCWeFOaWlp0eOPP37Tsccff1x/8zd/M8wzwp32yiuvaOPGjbd8Enz55ZcJHkPw+P56+C0tm5g/f77+6q/+SgcOHFBTU5M+//xzud1uvfPOO8rPz9eCBQusniKG0De/+U29//77Nx3793//d05UN9DtngRbWlqGeUa4U3h8fz28wmMThYWF2rZtm15++WWdPXtWQUFB8vv9io2NVVZWlgoKCqyeIobQmjVr9KMf/UjJyclKSEhQWFiYrl69KrfbrSNHjuinP/2p1VPEEOt7EkxPT79hjCdBs/D4/no4h8eGPB6PvF6vnE6nRo0aZfV0cIf84Q9/0C9/+Uu53e7A8e47gfXBBx+0enoYYh988MFtnwRnzZpl9TQxRHh8Dx7BAwCG4EkQuDWCB5KkadOm6T//8z+tngaGyf79+/UXf/EXVk8DwB3A4/vmOGkZkqS/+7u/s3oKGEb8Fof97N+/3+opYJjw+L45XuEBDNd3zlZ4eLhGjhxp9XRgkcTERDU0NFg9DcAyvMIDGKi3t1c7d+5Uenq6pk+frj/90z/V9OnTNXv2bP30pz8VP+fYD7FjjkuXLmnlypWaPn26li5dKrfb3W982rRpFs3s7savpQMG2rRpkw4fPqw1a9YoLi4u8LfT3G63ysrKdOXKFRUWFlo9TQBfw6ZNm+T3+7V582a9++67ys3N1e7duxUXFydJ/EBzC7ylZRPf+973bvsg+O1vfztMs8GdlpaWpoqKCn3zm9+8Yay5uVmLFy/WoUOHLJgZ7pSB/GmY6dOnD8NMcKc98sgjevvttxUZGSlJevXVV7V//37t3btXkZGR/BLKLfAKj00UFhbq+eef14oVK/gPyGzA5/PpG9/4xk3HoqOj1dPTM8wzwp22du1aNTc33/IHm6CgIH3yySfDPCvcCd3d3f3Ox1u9erU+++wzPffcc3rjjTd4hecWeIXHRrZv3676+nr98z//s9VTwR1WUFCg8PBwvfDCC4qJiQlcfvHiRRUXF8vn8+m1116zcIYYahcvXtTixYu1evXqm/49LZjjqaee0rRp01RQUKCgoCBJ1385IScnR9OmTdM777zDOVs3QfDYiM/n01NPPaVXXnlFsbGxVk8Hd9DFixf17LPP6ve//70iIyMD/+tue3u7kpKSVFpaqujoaKuniSF29OhRFRYW6r333pPDwe+kmKqxsVHLly/XH//xH/f7AfbMmTNasmSJzp49y6t5N0Hw2NjRo0eVlJRk9TRwB505c0ZNTU3yer0KCwtTfHy8vvWtb1k9LdxBv/zlL/Xoo49q9OjRVk8Fd9B///d/q6WlRX/0R3/U7/KOjg7t3btXS5cutWZidzGCx8Y4sQ0AYBe85mljtC4AwC4IHgAAYDyCx8ZWrlxp9RQAABgWnMMDAACMxys8AADAeAQPAAAwHsEDAACMR/AAAADjETwAbKuurk6TJ0+2ehoAhgHBAwAAjEfwALjr/Nd//Zf+8i//UomJiVqwYIHKysqUnp4uSfroo4+Uk5Oj5ORkzZ07V7/+9a8D2/31X/+1/vZv/1YrV65UYmKi5syZozfffDMwfv78ea1cuVLTpk3TnDlzdOjQoX77PXPmjFauXKnU1FTNnj1br776qq5duyZJ2rt3r7KysvTUU08pOTlZVVVVw/CVADBUCB4AdxWPx6Nly5ZpxowZqqur08svv6xf/OIXkq7/lei8vDytWLFCdXV12rBhg0pKSlRTUxPYfu/evXryySdVX1+v5cuXa9OmTTp37pwkafXq1QoJCVF1dbV27dql6urqwHZXrlzR0qVLFR8fr+rqav3sZz/TRx99pNdffz1wnRMnTmjevHn66KOPlJGRMUxfEQBDgeABcFf5j//4DwUHB+uZZ57Rfffdp8mTJ2vZsmWSpLfeektz5szRn/3Znyk4OFjTpk3T97//fe3evTuwfWpqqmbNmqWQkBBlZ2erp6dHZ86c0RdffKHf//73WrNmjUaOHKmxY8fq6aefDmz3/vvv69q1a3ruued0//33a+zYsXr22Wf73faIESO0YMEC3XfffQoNDR2+LwqA/7MQqycAAF929uxZjRs3Tg7H//95bPz48ZKkL774QrW1tUpOTg6M9fT0aMKECYHPH3jggcC/R4wYIUnq7e0NvMozbty4wPiXt/viiy908eJFTZ8+PXCZ3+9Xd3e32traArf95XkBuHcQPADuKuPGjVNLS4v8fr+CgoIkSS0tLZKkMWPG6IknntD69esD1z9//rwG8hdyxowZI0lqbm7WpEmTJF2Pqy+PT5gwQe+++27gMo/Ho7a2NkVHR0tSYD4A7j38qALgrpKeni6/369//Md/1LVr1/TZZ5/pjTfekCTl5ORo//79+vDDD9Xb26s//OEP+sEPfqCdO3fe9nbHjRunRx55RBs3btTly5d14cIF/cM//ENgfPbs2fJ6vdqxY4euXbumjo4O/fjHP9bq1asJHcAABA+Au0pYWJi2bdum3/72t0pJSdFzzz2nWbNmacSIEfrud7+rLVu2aMuWLZo+fbp+8IMfKD09Xc8///yAbvvv//7vFRERodmzZys7O1szZ84MjI0cOVLl5eWqq6vTY489pu9973tyOBwqKyu7U0sFMIz4a+kA7iqXLl3SZ599pqSkpMBl//Zv/6a3335bb731loUzA3Av4xUeAHeVnp4eLVmyRB988IEk6fPPP9fPfvYzzZ492+KZAbiX8QoPgLvOe++9p9dee02ff/65Ro0apSeeeEJPP/20QkL4PQsAXw/BAwAAjMdbWgAAwHgEDwAAMB7BAwAAjEfwAAAA4xE8AADAeAQPAAAwHsEDAACMR/AAAADj/T9cispj+AblBAAAAABJRU5ErkJggg=="
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "tags = user_info.gender.value_counts().sort_index()\n",
    "tags.plot.bar()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-24T11:27:09.336435300Z",
     "start_time": "2024-09-24T11:27:08.637972300Z"
    }
   },
   "id": "8f68754d573eff3c",
   "execution_count": 8
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "data": {
      "text/plain": "<Axes: xlabel='action_type'>"
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": "<Figure size 640x480 with 1 Axes>",
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAhUAAAG2CAYAAADIhHSjAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8fJSN1AAAACXBIWXMAAA9hAAAPYQGoP6dpAAAbjklEQVR4nO3deXDU9f348dduEAiHCAietNYDrEpVBKwVv1qtJyqgKNWq6ExpPcZSFdSqQ6eeUEe0astIKaOOWPBARKy39SqHIs7IYGmxjoKgIgQ0EcORfH5/+CPfb2qpWXgnS9jHY8YZs/nsfl677yV55rNXLsuyLAAAtlC+2AMAANsGUQEAJCEqAIAkRAUAkISoAACSEBUAQBKiAgBIQlQAAEmICgAgiaJGRUVFRRx77LExZ86cBm3fv3//OPjgg+v916NHj7jnnnsaeVIA4Ju0KNaO33zzzbj66qtj8eLFDT7Pk08+We/rO+64I1566aU455xzUo8HABSoKEcqHnvssRgxYkRcdtllX/vezJkzY/DgwdG7d+/o379/TJ8+/T9exuzZs+O+++6LO+64I9q2bdvYIwMA36AoUdGvX7947rnn4qSTTqp3+sKFC+Oiiy6Kn/3sZzFnzpy44YYb4uabb45XX3213nY1NTXx61//Oi666KLYY489mnByAGBTihIVXbp0iRYtvv7Iy+TJk+OYY46J4447LsrKyqJXr15x5plnxqRJk+pt98QTT8SaNWvivPPOa6qRAYBvULTnVPwnS5cujdmzZ0fv3r3rTqupqYlvfetb9bZ76KGHYsiQIdG6deumHhEA2IStKip23nnnGDRoUFx//fV1py1fvjyyLKv7esWKFTFv3rwYM2ZMMUYEADZhq3qfisGDB8eMGTPitddei9ra2nj//ffjnHPOiYkTJ9ZtM2/evOjatWt069atiJMCAP9uqzpSceCBB8bYsWNj7NixMXz48CgvL4+TTz45Lr/88rptlixZEjvttFMRpwQA/pNc9n8fWwAA2Exb1cMfAEDzJSoAgCREBQCQhKgAAJJo8ld/rFxZGaX01NBcLqJz5/Yld71LlfUuLda7tJTqem+83g3R5FGRZVFSi7FRqV7vUmW9S4v1Li3We9MKfvjjL3/5S+y3335x8MEH1/03cuTIxpgNAGhGCj5SMX/+/BgwYEDccsstjTEPANBMFXykYv78+XHAAQc0xiwAQDNW0JGK2traWLBgQZSXl8eECROipqYmjjzyyBgxYkR06NChQZeRy23WnM3Wxutbate7VFnv0mK9S0uprnch17egt+lesWJFDB8+PAYNGhT9+/ePVatWxVVXXRXl5eUxfvz4zZkVANhGbPFnf7z99ttx5plnxty5c6Ndu3bfuH2pvhSn1K53qbLepcV6l5ZSXe9Ge0npwoULY8aMGXHFFVdE7v8fD1m3bl3k8/lo2bJlgy6jVF+KU6rXu1RZ79JivUuL9d60gp6oucMOO8SkSZNiwoQJsWHDhli2bFnceuutMWjQoAZHBQCwbSooKnbeeee455574oUXXoi+ffvG6aefHj179oxRo0Y11nwAQDNR8PtU9O3bNyZPntwYswAAzZgPFAMAkhAVAEASogIASEJUAABJNPlHnxdLPp+LfL54761aVlacfqutzaK21guqAWh8JREV+XwuOuzQJloU6Rd7RETHjm2Lst8NNbXx2eo1wgKARlcyUdGiLB/DJ78V7y6vKvY4TWbvru3idz8+OPL5nKgAoNGVRFRs9O7yqliw7PNijwEA2yRP1AQAkhAVAEASogIASEJUAABJiAoAIAlRAQAkISoAgCREBQCQhKgAAJIQFQBAEqICAEhCVAAASYgKACAJUQEAJCEqAIAkRAUAkISoAACSEBUAQBKiAgBIQlQAAEmICgAgCVEBACQhKgCAJEQFAJCEqAAAkhAVAEASogIASEJUAABJiAoAIAlRAQAkISoAgCREBQCQhKgAAJIQFQBAEqICAEhCVAAASYgKACAJUQEAJCEqAIAkRAUAkISoAACSEBUAQBKiAgBIQlQAAEmICgAgCVEBACQhKgCAJEQFAJCEqAAAkhAVAEASogIASEJUAABJiAoAIAlRAQAksVlRUVNTE+eee25cffXVqecBAJqpzYqKu+++O+bOnZt6FgCgGSs4KmbNmhXPPvtsHHfccY0xDwDQTBUUFStXroxrr702brvttigvL2+smQCAZqhFQzesra2NkSNHxgUXXBD77rvvZu8wl9vss7IF3O5NY+Pt7PYuDda7tJTqehdyfRscFffcc0+0bNkyzj333M2ZqU7nzu236PwUrmPHtsUeoeS4n5cW611arPemNTgqHn/88Vi+fHn07t07IiKqq6sjIuL5558v6EmbK1dWRpYVOOUWKivLl/Qv1lWrvoiamtpij1EScrmvfuAU435O07PepaVU13vj9W6IBkfF008/Xe/rjS8nHT16dAGjRWRZlNRibC3c5k3L/by0WO/SYr03zZtfAQBJNPhIxb8r9AgFALBtc6QCAEhCVAAASYgKACAJUQEAJCEqAIAkRAUAkISoAACSEBUAQBKiAgBIQlQAAEmICgAgCVEBACQhKgCAJEQFAJCEqAAAkhAVAEASogIASEJUAABJiAoAIAlRAQAkISoAgCREBQCQhKgAAJIQFQBAEqICAEhCVAAASYgKACAJUQEAJCEqAIAkRAUAkISoAACSEBUAQBKiAgBIQlQAAEmICgAgCVEBACQhKgCAJEQFAJCEqAAAkhAVAEASogIASEJUAABJiAoAIAlRAQAkISoAgCREBQCQhKgAAJIQFQBAEqICAEhCVAAASYgKACAJUQEAJCEqAIAkRAUAkISoAACSEBUAQBKiAgBIQlQAAEmICgAgCVEBACQhKgCAJEQFAJCEqAAAkig4KmbNmhVnnHFG9OrVKw4//PC44YYborq6ujFmAwCakYKioqKiIn7+85/HWWedFXPnzo3HHnssXn/99Rg/fnxjzQcANBMtCtm4U6dOMXPmzGjXrl1kWRarV6+OtWvXRqdOnRprPgCgmSgoKiIi2rVrFxERRx55ZHzyySfRu3fvOO200xp8/lyu0D2Sgtu9aWy8nd3epcF6l5ZSXe9Crm8uy7Jsc3ZSXV0dn332WYwYMSJatWoVEyZM2JyLaVL973w1Fiz7vNhjNJn9d90+nvzFEcUeA4ASUfCRio1at24drVu3jpEjR8YZZ5wRn332WXTo0OEbz7dyZWVsXsZsvrKyfHTs2LZpd7oVWbXqi6ipqS32GCUhl4vo3Ll9Ue7nND3rXVpKdb03Xu+GKCgq5s2bF9dcc01Mnz49WrZsGRER69ati+222y7Ky8sbdBlZFiW1GFsLt3nTcj8vLda7tFjvTSvo1R89evSI6urquO2222LdunWxdOnSGDNmTAwePLguMgCA0lRQVLRt2zYmTJgQixYtisMPPzzOPffc+MEPfhDXXHNNY80HADQTBT+nYu+9946JEyc2xiwAQDPmbboBgCREBQCQhKgAAJIQFQBAEqICAEhCVAAASYgKACAJUQEAJCEqAIAkRAUAkISoAACSEBUAQBKiAgBIQlQAAEmICgAgCVEBACQhKgCAJEQFAJCEqAAAkhAVAEASogIASEJUAABJiAoAIAlRAQAkISoAgCREBQCQhKgAAJIQFQBAEqICAEhCVAAASYgKACAJUQEAJCEqAIAkRAUAkISoAACSEBUAQBKiAgBIQlQAAEmICgAgCVEBACQhKgCAJEQFAJCEqAAAkhAVAEASogIASEJUAABJiAoAIAlRAQAkISoAgCREBQCQhKgAAJIQFQBAEqICAEhCVAAASYgKACAJUQEAJCEqAIAkRAUAkISoAACSEBUAQBKiAgBIQlQAAEkUFBULFy6MCy64IPr27RuHH354XHnllVFRUdFYswEAzUiDo6K6ujp++tOfxsEHHxyvvfZazJgxI1avXh3XXHNNY84HADQTDY6KZcuWxb777huXXHJJtGzZMjp27BhDhgyJN954ozHnAwCaiRYN3XDPPfeMCRMm1DvtmWeeif333z/5UABA89PgqPi/siyLO+64I/7617/GAw88UNB5c7nN2SNbyu3eNDbezm7v0mC9S0uprnch17fgqKiqqopf/epXsWDBgnjggQeiR48eBZ2/c+f2he6SLdSxY9tij1By3M9Li/UuLdZ70wqKisWLF8ewYcNi1113jUceeSQ6depU8A5XrqyMLCv4bFukrCxf0r9YV636Impqaos9RknI5b76gVOM+zlNz3qXllJd743XuyEaHBWfffZZDB06NL7//e/HTTfdFPn85r3FRZZFSS3G1sJt3rTcz0uL9S4t1nvTGhwVU6dOjWXLlsVTTz0VTz/9dL3vvfXWW8kHAwCalwZHxQUXXBAXXHBBY84CADRj3qYbAEhCVAAASYgKACAJUQEAJCEqAIAkRAUAkISoAACSEBUAQBKiAgBIQlQAAEmICgAgCVEBACQhKgCAJEQFAJCEqAAAkhAVAEASogIASEJUAABJiAoAIAlRAQAkISoAgCREBQCQhKgAAJIQFQBAEqICAEhCVAAASYgKACAJUQEAJCEqAIAkRAUAkISoAACSEBUAQBKiAgBIQlQAAEmICgAgCVEBACQhKgCAJEQFAJCEqAAAkhAVAEASogIASEJUAABJiAoAIAlRAQAkISoAgCREBQCQhKgAAJIQFQBAEqICAEhCVAAASYgKACAJUQEAJCEqAIAkRAUAkISoAACSEBUAQBKiAgBIQlQAAEmICgAgCVEBACQhKgCAJEQFAJCEqAAAktjsqKioqIhjjz025syZk3IeAKCZ2qyoePPNN2PIkCGxePHi1PMAAM1UwVHx2GOPxYgRI+Kyyy5rjHkAgGaqRaFn6NevX5xyyinRokWLzQqLXK7gs5CA271pbLyd3d6lwXqXllJd70Kub8FR0aVLl0LPUk/nzu236PwUrmPHtsUeoeS4n5cW611arPemFRwVW2rlysrIsqbdZ1lZvqR/sa5a9UXU1NQWe4ySkMt99QOnGPdzmp71Li2lut4br3dDNHlUZFmU1GJsLdzmTcv9vLRY79JivTfN+1QAAEmICgAgiS16+OMf//hHqjkAgGbOkQoAIAlRAQAkISoAgCREBQCQhKgAAJIQFQBAEqICAEhCVAAASYgKACAJUQEAJCEqAIAkRAUAkISoAACSEBUAQBKiAgBIQlQAAEmICgAgCVEBACQhKgCAJEQFAJCEqAAAkhAVAEASogIASEJUAABJiAoAIAlRAQAkISoAgCREBQCQhKgAAJIQFQBAEqICAEhCVAAASYgKACAJUQEAJCEqAIAkRAUAkISoAACSEBUAQBKiAgBIQlQAAEmICgAgCVEBACTRotgDAGypfD4X+XyuaPsvKyvO32e1tVnU1mZF2Tf8J6ICaNby+Vx02KFNtCjSL/aIiI4d2xZlvxtqauOz1WuEBVsNUQE0a/l8LlqU5WP45Lfi3eVVxR6nyezdtV387scHRz6fK7mocGRq6yUqgG3Cu8urYsGyz4s9Bo3Mkamt+8iUqACg2XBkaus+MiUqAGh2HJnaOnlJKQCQhKgAAJIQFQBAEqICAEhCVAAASYgKACAJUQEAJCEqAIAkRAUAkISoAACSEBUAQBKiAgBIQlQAAEmICgAgiYKjYuXKlXHxxRdH796949BDD42bbropNmzY0BizAQDNSMFR8ctf/jLatGkTr776ajzyyCMxa9asuPfeexthNACgOSkoKj744IN4/fXXY+TIkVFeXh7dunWLiy++OCZNmtRY8wEAzUSLQjZetGhR7LDDDrHTTjvVnbbXXnvFsmXL4vPPP4/tt98++YCwOfL5XOTzuaLtv6ysOE9Xqq3NorY2K8q+AQqKii+++CLKy8vrnbbx6zVr1jQoKvL5iKxIP/P233X7KG9ZVpydF8GeO7at+/98CT0lN5fLxfYd2kSLIv1ij4jo2LHtN2/UCDbU1Mbnn62JrFj/yIrIv+/SYr2bTq6Av88Kioo2bdrEl19+We+0jV+3bduwH6KdOrUvZJdJ/XbwgUXbdzEV6xccTa9FWT46dWpX7DGKwr/v0mK9t04F9c4+++wTq1evjhUrVtSd9q9//St23nnnaN++eLEAABRfQVGxxx57xCGHHBI333xzVFVVxZIlS+IPf/hDDB48uLHmAwCaiVxW4IOvK1asiOuvvz7mzJkT+Xw+Bg4cGCNGjIiystJ5bAsA+LqCowIA4D8pwecMAwCNQVQAAEmICgAgCVEBACQhKgCAJERFI6mqqopPPvkkqqqqij0KAAlUVlbGhg0bij3GVk1UJFRbWxsTJ06Mo48+Ovr06RNHHXVU9OnTJ374wx/G73//+5L8PAbYVqxatSouvPDC6NOnT5x//vnx7rvv1vt+r169ijQZjWHt2rVx9913x4MPPhjV1dUxbNiw6Nu3b/Tq1StuuOGGWL9+fbFH3CoV9Nkf/HejR4+OWbNmxYgRI2LvvfeO8vLy+PLLL+Pdd9+NcePGxZo1a2LkyJHFHhPYDKNHj44sy2LMmDHx9NNPx09+8pOYNGlS7L333hER/mjYxtx6660xZ86cWLduXTz11FORy+ViypQpsW7duvjtb38b48aNi1/84hfFHnOr482vEjrssMPi4Ycfjt133/1r31uyZEn8+Mc/jr/97W9FmIzG8sYbb3zjNn369GmCSWhs/fr1iyeffDI6dOgQERG33357zJgxI6ZOnRodOnSIXr16xbx584o8Jan069cvpk2bFhUVFTFgwIB45ZVXokuXLhERsXTp0jjvvPPihRdeKPKUWx9HKhLasGFDdO3a9T9+r1OnTlFTU9PEE9HYrr322liyZMkm/0rN5XLx97//vYmnojGsX78+2rX730+Aveyyy+K9996Lyy+/PP70pz85UrGN+fLLL2PHHXeMHXfcMbp27VoXkxERXbt2jcrKyiJOt/XynIqE+vbtG9ddd129T3GNiKioqIhRo0bFoYceWqTJaCyTJ0+Obt26xe233x4LFy782n+CYtux//77x7hx4+rFwy233BJLly6Na665poiT0Rj22muvmDZtWkREvPzyy9GyZcuI+OqPx7Fjx0bPnj2LON3Wy8MfCVVUVMTw4cNj7ty50aFDh2jTpk18+eWXsXr16jjkkEPizjvvjE6dOhV7TBJ78803Y+TIkfH8889HPq/Tt1ULFy6MYcOGxXe/+90YP3583emLFy+OoUOHxscffywityGzZs2KCy+8MGbNmhVt2rSpO/3EE0+MtWvXxh//+MfYa6+9ijjh1klUNILFixfHokWL4osvvog2bdrEPvvsE9/+9reLPRaNaNq0aXHEEUdE586diz0KjWjt2rWxbNmy+M53vlPv9M8//zymTp0a559/fnEGo1FUVFR87Q/Bt956K3r06FEvNPhfogIASMKxWgAgCVEBACQhKgCAJEQFlKj333+/KPutrKyMioqKouwbaFyiAkrQO++8EyeffHLd16NGjYpRo0Y1yb6PPfbYWLRoUZPsC2ha3lETSlBlZWW9D0S6/vrrm2zfq1atarJ9AU3LS0qhmXvxxRdj/Pjx8cEHH8SaNWuiZ8+eceONN8Yee+wRTzzxRNxzzz2xdOnS2HnnnePSSy+Nnj17Rv/+/WPt2rXRpk2bmDhxYkyZMiUivvrQrIiIhx9+OO6999746KOPYrfddothw4bFqaeeGhER5557bhx00EExb968eOedd+ou96STTvrGWY8//vh4//33o1WrVnHppZfG1KlTY8CAAXHhhRfWbXPKKafE0KFDI5/Px+TJk2PfffeNGTNmRJs2beLss8+Oiy66KHK5XKxbty7GjRsX06dPj8rKyjjwwAPjuuuu854wUEwZ0Gx99NFH2QEHHJC98MILWZZlWUVFRXb22WdnI0aMyGbPnp0dcMAB2UsvvZTV1NRkL7/8crb//vtnixYtymbPnp1179697nKuuuqq7KqrrsqyLMseffTRrFevXtnMmTOzDRs2ZDNnzsx69eqVPfvss1mWZdk555yT9e3bN1uwYEG2du3abOzYsdkhhxySVVdXN2jm7t27Z7Nnz86yLMvGjx+fnXDCCXXfmz9/fnbQQQdlVVVV2aOPPpp17949u/nmm7Pq6urs7bffzvr27Zs99NBDWZZl2ejRo7OBAwdmixcvzqqrq7O77rorO/rooxs8B5Ce51RAM9apU6d48skn4+ijj46qqqr4+OOPo2PHjvHJJ5/EtGnT4rjjjosjjzwy8vl8/M///E88+OCDsdNOO/3Xy3z00UdjyJAhcdhhh0VZWVkcdthhMWTIkJg8eXLdNscff3zst99+0bJlyxg0aFBUVlbGypUrC55/4MCBsXjx4pg/f35EfPXOpCeccEK0bds2IiJ22GGHGDFiRLRq1Sp69uwZQ4YMienTp0eWZTF58uS4/PLLo1u3btGqVau45JJLYv369fHSSy8VPAeQhudUQDO23XbbxYwZM2Ly5MmRy+Wie/fuUVVVFS1atIjly5fHfvvtV2/7733ve994mStWrIhu3brVO2333XePF198se7rjR8BHRHRosVXP0Zqa2sLnr9Lly5xxBFHxOOPP173MMddd91V9/3ddtsttttuu7qvd9lll3jmmWeioqIi1qxZE8OHD6/3eSvr16+PpUuXFjwHkIaogGbsqaeeigceeCD+/Oc/1z2X4IYbboh//vOfscsuu8SyZcvqbT9x4sQ46KCD/utl7r777rF48eJ6py1ZsqReSKR0+umnx29+85s4/PDDo3379tGnT5+67y1fvjyyLItcLhcRER9++GHsuuuu0bFjx2jVqtXXrs977733jUdigMbj4Q9oxiorKyOfz0fr1q0jy7J45ZVXYtq0abF+/foYNGhQPPfcc/Haa69FbW1tvPrqq3HXXXdF+/bto1WrVnXn/3eDBw+OKVOmxKxZs6KmpiZmz54dU6ZMidNPPz3JzC1btqy336OOOipqamrizjvvjNNOO63etp9++mmMHz8+1q9fH2+//XY8/PDDccYZZ0Q+n4/BgwfHbbfdFh9//HHU1tbGY489FieffHJ88MEHSeYECudIBTRjgwYNijfffDP69+8fZWVlseeee8bQoUNj0qRJ0bNnzxgzZkyMGTMmli5dGrvttluMHTs29tlnn1izZk0ccsghccQRR8Tvfve7epd54oknRlVVVdx4442xbNmy2GmnneLKK6+MgQMHJpl5yJAhccUVV8T5558fl112WWy33XZx6qmnxv333x/jxo2rt22XLl3iww8/jH79+kXbtm1j+PDhda8yueqqq+Kuu+6Ks88+O1avXh3dunWLO++882sP+QBNx0tKgaK7//7745VXXokJEybUnTZ16tS4++676z2XA9i6efgDKJpPP/003n777bjvvvvirLPOKvY4wBby8AeQxMqVK+NHP/rRf93mrbfeqvf1Sy+9FDfeeGMMGDAgjjnmmMYcD2gCHv4AAJLw8AcAkISoAACSEBUAQBKiAgBIQlQAAEmICgAgCVEBACQhKgCAJEQFAJDE/wMU7rAu1OLTfwAAAABJRU5ErkJggg=="
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# 用户操作类型分布\n",
    "tags = user_log.action_type.value_counts().sort_index()\n",
    "tags.plot.bar()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-24T11:28:25.480669100Z",
     "start_time": "2024-09-24T11:28:24.906286800Z"
    }
   },
   "id": "691690cc08e86bb0",
   "execution_count": 9
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "data": {
      "text/plain": "   user_id  merchant_id  prob\n0   163968         4605   NaN\n1   360576         1581   NaN\n2    98688         1964   NaN\n3    98688         3645   NaN\n4   295296         3361   NaN",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>user_id</th>\n      <th>merchant_id</th>\n      <th>prob</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>163968</td>\n      <td>4605</td>\n      <td>NaN</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>360576</td>\n      <td>1581</td>\n      <td>NaN</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>98688</td>\n      <td>1964</td>\n      <td>NaN</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>98688</td>\n      <td>3645</td>\n      <td>NaN</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>295296</td>\n      <td>3361</td>\n      <td>NaN</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test = pd.read_csv('test_format1.csv')\n",
    "test.head()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-24T11:28:41.664406800Z",
     "start_time": "2024-09-24T11:28:41.323181600Z"
    }
   },
   "id": "940d24725d2d3f28",
   "execution_count": 10
  },
  {
   "cell_type": "code",
   "outputs": [],
   "source": [
    "user_log.rename(columns={'seller_id':'merchant_id'},inplace=True)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-24T11:28:45.005623700Z",
     "start_time": "2024-09-24T11:28:44.866531400Z"
    }
   },
   "id": "9df793452e733728",
   "execution_count": 11
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "data": {
      "text/plain": "   user_id  merchant_id  label  age_range  gender\n0    34176         3906      0        6.0     0.0\n1    34176          121      0        6.0     0.0\n2    34176         4356      1        6.0     0.0\n3    34176         2217      0        6.0     0.0\n4   230784         4818      0        0.0     0.0",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>user_id</th>\n      <th>merchant_id</th>\n      <th>label</th>\n      <th>age_range</th>\n      <th>gender</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>34176</td>\n      <td>3906</td>\n      <td>0</td>\n      <td>6.0</td>\n      <td>0.0</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>34176</td>\n      <td>121</td>\n      <td>0</td>\n      <td>6.0</td>\n      <td>0.0</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>34176</td>\n      <td>4356</td>\n      <td>1</td>\n      <td>6.0</td>\n      <td>0.0</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>34176</td>\n      <td>2217</td>\n      <td>0</td>\n      <td>6.0</td>\n      <td>0.0</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>230784</td>\n      <td>4818</td>\n      <td>0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = pd.merge(train,user_info, on='user_id')\n",
    "data.head()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-24T11:28:50.238095100Z",
     "start_time": "2024-09-24T11:28:49.939898100Z"
    }
   },
   "id": "a4a491529289015f",
   "execution_count": 12
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 522341 entries, 0 to 522340\n",
      "Data columns (total 6 columns):\n",
      " #   Column       Non-Null Count   Dtype \n",
      "---  ------       --------------   ----- \n",
      " 0   user_id      522341 non-null  int32 \n",
      " 1   merchant_id  522341 non-null  int32 \n",
      " 2   label        522341 non-null  object\n",
      " 3   origin       522341 non-null  object\n",
      " 4   age_range    522341 non-null  int8  \n",
      " 5   gender       522341 non-null  int8  \n",
      "dtypes: int32(2), int8(2), object(2)\n",
      "memory usage: 13.0+ MB\n"
     ]
    }
   ],
   "source": [
    "data.info()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-23T12:26:30.693623200Z",
     "start_time": "2024-09-23T12:26:30.573543500Z"
    }
   },
   "id": "8c8f19324e766e4d",
   "execution_count": 48
  },
  {
   "cell_type": "markdown",
   "source": [
    "# 选取的特征\n",
    "用户的年龄(age_range)\n",
    "用户的性别(gender)\n",
    "某用户在该商家日志的总条数(total_logs)\n",
    "用户浏览的商品的数目，就是浏览了多少个商品(unique_item_ids)\n",
    "浏览的商品的种类的数目，就是浏览了多少种商品(categories)\n",
    "用户浏览的天数(browse_days)\n",
    "用户单击的次数(one_clicks)\n",
    "用户添加购物车的次数(shopping_carts)\n",
    "用户购买的次数(purchase_times)\n",
    "用户收藏的次数(favourite_times)"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "f372c652392c9afc"
  },
  {
   "cell_type": "code",
   "outputs": [],
   "source": [
    "# 某用户在该商家日志的总条数(total_logs)\n",
    "total_logs = user_log.groupby([user_log['user_id'],user_log['merchant_id']]).count().reset_index()[['user_id','merchant_id','item_id']]"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-24T11:29:31.282032800Z",
     "start_time": "2024-09-24T11:28:55.820798500Z"
    }
   },
   "id": "f1ca6388f423e4f9",
   "execution_count": 13
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "data": {
      "text/plain": "   user_id  merchant_id  total_logs\n0        1          471           1\n1        1          739           1\n2        1          925           4\n3        1         1019          14\n4        1         1156           1",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>user_id</th>\n      <th>merchant_id</th>\n      <th>total_logs</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>1</td>\n      <td>471</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>1</td>\n      <td>739</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>1</td>\n      <td>925</td>\n      <td>4</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>1</td>\n      <td>1019</td>\n      <td>14</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>1</td>\n      <td>1156</td>\n      <td>1</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "total_logs.rename(columns={\"item_id\":\"total_logs\"},inplace=True)\n",
    "total_logs.head()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-24T11:29:46.148132900Z",
     "start_time": "2024-09-24T11:29:45.993030Z"
    }
   },
   "id": "47f95ad6cda6838",
   "execution_count": 14
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "data": {
      "text/plain": "   user_id  merchant_id  label  age_range  gender  total_logs\n0    34176         3906      0        6.0     0.0          39\n1    34176          121      0        6.0     0.0          14\n2    34176         4356      1        6.0     0.0          18\n3    34176         2217      0        6.0     0.0           2\n4   230784         4818      0        0.0     0.0           8",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>user_id</th>\n      <th>merchant_id</th>\n      <th>label</th>\n      <th>age_range</th>\n      <th>gender</th>\n      <th>total_logs</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>34176</td>\n      <td>3906</td>\n      <td>0</td>\n      <td>6.0</td>\n      <td>0.0</td>\n      <td>39</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>34176</td>\n      <td>121</td>\n      <td>0</td>\n      <td>6.0</td>\n      <td>0.0</td>\n      <td>14</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>34176</td>\n      <td>4356</td>\n      <td>1</td>\n      <td>6.0</td>\n      <td>0.0</td>\n      <td>18</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>34176</td>\n      <td>2217</td>\n      <td>0</td>\n      <td>6.0</td>\n      <td>0.0</td>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>230784</td>\n      <td>4818</td>\n      <td>0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>8</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = pd.merge(data,total_logs,on=[\"user_id\",\"merchant_id\"],how=\"left\")\n",
    "data.head()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-24T11:29:56.318882100Z",
     "start_time": "2024-09-24T11:29:49.017036Z"
    }
   },
   "id": "216f675c529a3679",
   "execution_count": 15
  },
  {
   "cell_type": "code",
   "outputs": [],
   "source": [
    "# 用户浏览的商品的数目(unique_item_ids)\n",
    "unique_item_ids_tmp = user_log.groupby([user_log['user_id'],user_log['merchant_id'],user_log['item_id']]).count().reset_index()[['user_id','merchant_id']]"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-24T11:30:53.826204500Z",
     "start_time": "2024-09-24T11:30:00.098388600Z"
    }
   },
   "id": "90c231817110ead8",
   "execution_count": 16
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "data": {
      "text/plain": "   user_id  merchant_id\n0        1          471\n1        1          739\n2        1          925\n3        1         1019\n4        1         1156",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>user_id</th>\n      <th>merchant_id</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>1</td>\n      <td>471</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>1</td>\n      <td>739</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>1</td>\n      <td>925</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>1</td>\n      <td>1019</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>1</td>\n      <td>1156</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "unique_item_ids_tmp.head()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-24T11:31:02.583049800Z",
     "start_time": "2024-09-24T11:31:02.364903100Z"
    }
   },
   "id": "9624fd79706bbae3",
   "execution_count": 17
  },
  {
   "cell_type": "code",
   "outputs": [],
   "source": [
    "unique_item_ids_tmp['cnt'] = 1\n",
    "unique_item_ids = unique_item_ids_tmp.groupby([unique_item_ids_tmp[\"user_id\"],unique_item_ids_tmp[\"merchant_id\"]]).count().reset_index()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-24T11:31:13.437198400Z",
     "start_time": "2024-09-24T11:31:05.390912100Z"
    }
   },
   "id": "e64085029a4781d8",
   "execution_count": 18
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "data": {
      "text/plain": "   user_id  merchant_id  unique_item_ids\n0        1          471                1\n1        1          739                1\n2        1          925                1\n3        1         1019                1\n4        1         1156                1\n5        1         2245                4\n6        1         4026                1\n7        1         4177                1\n8        1         4335                1\n9        2          420               15",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>user_id</th>\n      <th>merchant_id</th>\n      <th>unique_item_ids</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>1</td>\n      <td>471</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>1</td>\n      <td>739</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>1</td>\n      <td>925</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>1</td>\n      <td>1019</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>1</td>\n      <td>1156</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>1</td>\n      <td>2245</td>\n      <td>4</td>\n    </tr>\n    <tr>\n      <th>6</th>\n      <td>1</td>\n      <td>4026</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>7</th>\n      <td>1</td>\n      <td>4177</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>8</th>\n      <td>1</td>\n      <td>4335</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>9</th>\n      <td>2</td>\n      <td>420</td>\n      <td>15</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "unique_item_ids.rename(columns={\"cnt\":\"unique_item_ids\"},inplace=True)\n",
    "unique_item_ids.head(10)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-24T11:31:14.848153200Z",
     "start_time": "2024-09-24T11:31:14.610993Z"
    }
   },
   "id": "f26a49d9cd2eef0a",
   "execution_count": 19
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "data": {
      "text/plain": "   user_id  merchant_id  label  age_range  gender  total_logs  unique_item_ids\n0    34176         3906      0        6.0     0.0          39               20\n1    34176          121      0        6.0     0.0          14                1\n2    34176         4356      1        6.0     0.0          18                2\n3    34176         2217      0        6.0     0.0           2                1\n4   230784         4818      0        0.0     0.0           8                1",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>user_id</th>\n      <th>merchant_id</th>\n      <th>label</th>\n      <th>age_range</th>\n      <th>gender</th>\n      <th>total_logs</th>\n      <th>unique_item_ids</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>34176</td>\n      <td>3906</td>\n      <td>0</td>\n      <td>6.0</td>\n      <td>0.0</td>\n      <td>39</td>\n      <td>20</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>34176</td>\n      <td>121</td>\n      <td>0</td>\n      <td>6.0</td>\n      <td>0.0</td>\n      <td>14</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>34176</td>\n      <td>4356</td>\n      <td>1</td>\n      <td>6.0</td>\n      <td>0.0</td>\n      <td>18</td>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>34176</td>\n      <td>2217</td>\n      <td>0</td>\n      <td>6.0</td>\n      <td>0.0</td>\n      <td>2</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>230784</td>\n      <td>4818</td>\n      <td>0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>8</td>\n      <td>1</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = pd.merge(data,unique_item_ids,on=[\"user_id\",\"merchant_id\"],how=\"left\")\n",
    "data.head()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-24T11:31:25.733995Z",
     "start_time": "2024-09-24T11:31:17.606981300Z"
    }
   },
   "id": "998af52818e103cf",
   "execution_count": 20
  },
  {
   "cell_type": "code",
   "outputs": [],
   "source": [
    "# 浏览的商品的种类的数目(categories)\n",
    "categories = user_log.groupby([user_log[\"user_id\"],user_log[\"merchant_id\"],user_log[\"cat_id\"]]).count().reset_index()[[\"user_id\",\"merchant_id\",'item_id']]"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-24T11:32:04.792063500Z",
     "start_time": "2024-09-24T11:31:28.799027900Z"
    }
   },
   "id": "26bf1982420f3667",
   "execution_count": 21
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "data": {
      "text/plain": "   user_id  merchant_id  categories\n0        1          471           1\n1        1          739           1\n2        1          925           4\n3        1         1019          14\n4        1         1156           1\n5        1         2245           5\n6        1         4026           5\n7        1         4177           1\n8        1         4335           1\n9        2          420          18",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>user_id</th>\n      <th>merchant_id</th>\n      <th>categories</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>1</td>\n      <td>471</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>1</td>\n      <td>739</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>1</td>\n      <td>925</td>\n      <td>4</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>1</td>\n      <td>1019</td>\n      <td>14</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>1</td>\n      <td>1156</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>1</td>\n      <td>2245</td>\n      <td>5</td>\n    </tr>\n    <tr>\n      <th>6</th>\n      <td>1</td>\n      <td>4026</td>\n      <td>5</td>\n    </tr>\n    <tr>\n      <th>7</th>\n      <td>1</td>\n      <td>4177</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>8</th>\n      <td>1</td>\n      <td>4335</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>9</th>\n      <td>2</td>\n      <td>420</td>\n      <td>18</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "categories.rename(columns={\"item_id\":\"categories\"},inplace=True)\n",
    "categories.head(10)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-24T11:32:13.220785400Z",
     "start_time": "2024-09-24T11:32:13.030659300Z"
    }
   },
   "id": "e7556a835e678ed8",
   "execution_count": 22
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "data": {
      "text/plain": "   user_id  merchant_id  label  age_range  gender  total_logs  \\\n0    34176         3906      0        6.0     0.0          39   \n1    34176         3906      0        6.0     0.0          39   \n2    34176         3906      0        6.0     0.0          39   \n3    34176         3906      0        6.0     0.0          39   \n4    34176         3906      0        6.0     0.0          39   \n\n   unique_item_ids  categories  \n0               20           1  \n1               20           7  \n2               20          24  \n3               20           1  \n4               20           2  ",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>user_id</th>\n      <th>merchant_id</th>\n      <th>label</th>\n      <th>age_range</th>\n      <th>gender</th>\n      <th>total_logs</th>\n      <th>unique_item_ids</th>\n      <th>categories</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>34176</td>\n      <td>3906</td>\n      <td>0</td>\n      <td>6.0</td>\n      <td>0.0</td>\n      <td>39</td>\n      <td>20</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>34176</td>\n      <td>3906</td>\n      <td>0</td>\n      <td>6.0</td>\n      <td>0.0</td>\n      <td>39</td>\n      <td>20</td>\n      <td>7</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>34176</td>\n      <td>3906</td>\n      <td>0</td>\n      <td>6.0</td>\n      <td>0.0</td>\n      <td>39</td>\n      <td>20</td>\n      <td>24</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>34176</td>\n      <td>3906</td>\n      <td>0</td>\n      <td>6.0</td>\n      <td>0.0</td>\n      <td>39</td>\n      <td>20</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>34176</td>\n      <td>3906</td>\n      <td>0</td>\n      <td>6.0</td>\n      <td>0.0</td>\n      <td>39</td>\n      <td>20</td>\n      <td>2</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = pd.merge(data,categories,on=['user_id','merchant_id'],how='left')\n",
    "data.head()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-24T11:32:24.861509200Z",
     "start_time": "2024-09-24T11:32:15.975614900Z"
    }
   },
   "id": "c60a095c15849cac",
   "execution_count": 23
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "data": {
      "text/plain": "   user_id  merchant_id  time_stamp\n0        1          471        1111\n1        1          739        1018\n2        1          925        1011\n3        1         1019        1111\n4        1         1156        1111\n5        1         2245        1009\n6        1         4026        1018\n7        1         4026        1021\n8        1         4177        1018\n9        1         4335        1111",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>user_id</th>\n      <th>merchant_id</th>\n      <th>time_stamp</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>1</td>\n      <td>471</td>\n      <td>1111</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>1</td>\n      <td>739</td>\n      <td>1018</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>1</td>\n      <td>925</td>\n      <td>1011</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>1</td>\n      <td>1019</td>\n      <td>1111</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>1</td>\n      <td>1156</td>\n      <td>1111</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>1</td>\n      <td>2245</td>\n      <td>1009</td>\n    </tr>\n    <tr>\n      <th>6</th>\n      <td>1</td>\n      <td>4026</td>\n      <td>1018</td>\n    </tr>\n    <tr>\n      <th>7</th>\n      <td>1</td>\n      <td>4026</td>\n      <td>1021</td>\n    </tr>\n    <tr>\n      <th>8</th>\n      <td>1</td>\n      <td>4177</td>\n      <td>1018</td>\n    </tr>\n    <tr>\n      <th>9</th>\n      <td>1</td>\n      <td>4335</td>\n      <td>1111</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 用户浏览的天数(browse_days)\n",
    "browse_days_temp = user_log.groupby([user_log[\"user_id\"],user_log[\"merchant_id\"],user_log[\"time_stamp\"]]).count().reset_index()[[\"user_id\",\"merchant_id\",\"time_stamp\"]]\n",
    "browse_days_temp.head(10)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-24T11:33:05.222390800Z",
     "start_time": "2024-09-24T11:32:28.185714800Z"
    }
   },
   "id": "f86771ad36d1c21c",
   "execution_count": 24
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "data": {
      "text/plain": "   user_id  merchant_id  time_stamp\n0        1          471           1\n1        1          739           1\n2        1          925           1\n3        1         1019           1\n4        1         1156           1",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>user_id</th>\n      <th>merchant_id</th>\n      <th>time_stamp</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>1</td>\n      <td>471</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>1</td>\n      <td>739</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>1</td>\n      <td>925</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>1</td>\n      <td>1019</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>1</td>\n      <td>1156</td>\n      <td>1</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "browse_days_temp1 = browse_days_temp.groupby([browse_days_temp[\"user_id\"],browse_days_temp[\"merchant_id\"]]).count().reset_index()\n",
    "browse_days_temp1.head()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-24T11:33:16.670612500Z",
     "start_time": "2024-09-24T11:33:08.690317500Z"
    }
   },
   "id": "e13cd3d6b52b355c",
   "execution_count": 25
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "data": {
      "text/plain": "   user_id  merchant_id  browse_days\n0        1          471            1\n1        1          739            1\n2        1          925            1\n3        1         1019            1\n4        1         1156            1\n5        1         2245            1\n6        1         4026            2\n7        1         4177            1\n8        1         4335            1\n9        2          420            1",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>user_id</th>\n      <th>merchant_id</th>\n      <th>browse_days</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>1</td>\n      <td>471</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>1</td>\n      <td>739</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>1</td>\n      <td>925</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>1</td>\n      <td>1019</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>1</td>\n      <td>1156</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>1</td>\n      <td>2245</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>6</th>\n      <td>1</td>\n      <td>4026</td>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>7</th>\n      <td>1</td>\n      <td>4177</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>8</th>\n      <td>1</td>\n      <td>4335</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>9</th>\n      <td>2</td>\n      <td>420</td>\n      <td>1</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "browse_days_temp1.rename(columns={\"time_stamp\":\"browse_days\"},inplace=True)\n",
    "browse_days_temp1.head(10)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-24T11:33:20.226361500Z",
     "start_time": "2024-09-24T11:33:20.072871900Z"
    }
   },
   "id": "95baec721aa0b64f",
   "execution_count": 26
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "data": {
      "text/plain": "   user_id  merchant_id  label  age_range  gender  total_logs  \\\n0    34176         3906      0        6.0     0.0          39   \n1    34176         3906      0        6.0     0.0          39   \n2    34176         3906      0        6.0     0.0          39   \n3    34176         3906      0        6.0     0.0          39   \n4    34176         3906      0        6.0     0.0          39   \n5    34176         3906      0        6.0     0.0          39   \n6    34176          121      0        6.0     0.0          14   \n7    34176         4356      1        6.0     0.0          18   \n8    34176         2217      0        6.0     0.0           2   \n9   230784         4818      0        0.0     0.0           8   \n\n   unique_item_ids  categories  browse_days  \n0               20           1            9  \n1               20           7            9  \n2               20          24            9  \n3               20           1            9  \n4               20           2            9  \n5               20           4            9  \n6                1          14            3  \n7                2          18            2  \n8                1           2            1  \n9                1           8            3  ",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>user_id</th>\n      <th>merchant_id</th>\n      <th>label</th>\n      <th>age_range</th>\n      <th>gender</th>\n      <th>total_logs</th>\n      <th>unique_item_ids</th>\n      <th>categories</th>\n      <th>browse_days</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>34176</td>\n      <td>3906</td>\n      <td>0</td>\n      <td>6.0</td>\n      <td>0.0</td>\n      <td>39</td>\n      <td>20</td>\n      <td>1</td>\n      <td>9</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>34176</td>\n      <td>3906</td>\n      <td>0</td>\n      <td>6.0</td>\n      <td>0.0</td>\n      <td>39</td>\n      <td>20</td>\n      <td>7</td>\n      <td>9</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>34176</td>\n      <td>3906</td>\n      <td>0</td>\n      <td>6.0</td>\n      <td>0.0</td>\n      <td>39</td>\n      <td>20</td>\n      <td>24</td>\n      <td>9</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>34176</td>\n      <td>3906</td>\n      <td>0</td>\n      <td>6.0</td>\n      <td>0.0</td>\n      <td>39</td>\n      <td>20</td>\n      <td>1</td>\n      <td>9</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>34176</td>\n      <td>3906</td>\n      <td>0</td>\n      <td>6.0</td>\n      <td>0.0</td>\n      <td>39</td>\n      <td>20</td>\n      <td>2</td>\n      <td>9</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>34176</td>\n      <td>3906</td>\n      <td>0</td>\n      <td>6.0</td>\n      <td>0.0</td>\n      <td>39</td>\n      <td>20</td>\n      <td>4</td>\n      <td>9</td>\n    </tr>\n    <tr>\n      <th>6</th>\n      <td>34176</td>\n      <td>121</td>\n      <td>0</td>\n      <td>6.0</td>\n      <td>0.0</td>\n      <td>14</td>\n      <td>1</td>\n      <td>14</td>\n      <td>3</td>\n    </tr>\n    <tr>\n      <th>7</th>\n      <td>34176</td>\n      <td>4356</td>\n      <td>1</td>\n      <td>6.0</td>\n      <td>0.0</td>\n      <td>18</td>\n      <td>2</td>\n      <td>18</td>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>8</th>\n      <td>34176</td>\n      <td>2217</td>\n      <td>0</td>\n      <td>6.0</td>\n      <td>0.0</td>\n      <td>2</td>\n      <td>1</td>\n      <td>2</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>9</th>\n      <td>230784</td>\n      <td>4818</td>\n      <td>0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>8</td>\n      <td>1</td>\n      <td>8</td>\n      <td>3</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = pd.merge(data,browse_days_temp1,on=[\"user_id\",\"merchant_id\"],how=\"left\")\n",
    "data.head(10)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-24T11:33:31.176625500Z",
     "start_time": "2024-09-24T11:33:23.223349600Z"
    }
   },
   "id": "2203df1d3ed25168",
   "execution_count": 27
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "data": {
      "text/plain": "   user_id  merchant_id  action_type  item_id\n0        1          471            0        1\n1        1          739            0        1\n2        1          925            0        3\n3        1          925            2        1\n4        1         1019            0       10",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>user_id</th>\n      <th>merchant_id</th>\n      <th>action_type</th>\n      <th>item_id</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>1</td>\n      <td>471</td>\n      <td>0</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>1</td>\n      <td>739</td>\n      <td>0</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>1</td>\n      <td>925</td>\n      <td>0</td>\n      <td>3</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>1</td>\n      <td>925</td>\n      <td>2</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>1</td>\n      <td>1019</td>\n      <td>0</td>\n      <td>10</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "one_clicks_temp = user_log.groupby([user_log[\"user_id\"], user_log[\"merchant_id\"], user_log[\"action_type\"]]).count().reset_index()[\n",
    "    [\"user_id\", \"merchant_id\", \"action_type\", \"item_id\"]]\n",
    "one_clicks_temp.head()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-24T11:34:12.507810600Z",
     "start_time": "2024-09-24T11:33:35.626731700Z"
    }
   },
   "id": "6f2b8f248c76e9bd",
   "execution_count": 28
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "data": {
      "text/plain": "   user_id  merchant_id  action_type  times\n0        1          471            0      1\n1        1          739            0      1\n2        1          925            0      3\n3        1          925            2      1\n4        1         1019            0     10",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>user_id</th>\n      <th>merchant_id</th>\n      <th>action_type</th>\n      <th>times</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>1</td>\n      <td>471</td>\n      <td>0</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>1</td>\n      <td>739</td>\n      <td>0</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>1</td>\n      <td>925</td>\n      <td>0</td>\n      <td>3</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>1</td>\n      <td>925</td>\n      <td>2</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>1</td>\n      <td>1019</td>\n      <td>0</td>\n      <td>10</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "one_clicks_temp.rename(columns={\"item_id\": \"times\"}, inplace=True)\n",
    "one_clicks_temp.head()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-24T11:34:16.080181700Z",
     "start_time": "2024-09-24T11:34:15.857034400Z"
    }
   },
   "id": "8e92ea785aeb93e2",
   "execution_count": 29
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "data": {
      "text/plain": "   user_id  merchant_id  action_type  times  one_clicks\n0        1          471            0      1           1\n1        1          739            0      1           1\n2        1          925            0      3           3\n3        1          925            2      1           0\n4        1         1019            0     10          10",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>user_id</th>\n      <th>merchant_id</th>\n      <th>action_type</th>\n      <th>times</th>\n      <th>one_clicks</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>1</td>\n      <td>471</td>\n      <td>0</td>\n      <td>1</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>1</td>\n      <td>739</td>\n      <td>0</td>\n      <td>1</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>1</td>\n      <td>925</td>\n      <td>0</td>\n      <td>3</td>\n      <td>3</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>1</td>\n      <td>925</td>\n      <td>2</td>\n      <td>1</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>1</td>\n      <td>1019</td>\n      <td>0</td>\n      <td>10</td>\n      <td>10</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "one_clicks_temp[\"one_clicks\"] = one_clicks_temp[\"action_type\"] == 0\n",
    "one_clicks_temp[\"one_clicks\"] = one_clicks_temp[\"one_clicks\"] * one_clicks_temp[\"times\"]\n",
    "one_clicks_temp.head()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-24T11:34:19.740610Z",
     "start_time": "2024-09-24T11:34:19.125201300Z"
    }
   },
   "id": "7985d713b1f8fd8e",
   "execution_count": 30
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "data": {
      "text/plain": "   user_id  merchant_id  action_type  times  one_clicks  shopping_carts\n0        1          471            0      1           1               0\n1        1          739            0      1           1               0\n2        1          925            0      3           3               0\n3        1          925            2      1           0               0\n4        1         1019            0     10          10               0",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>user_id</th>\n      <th>merchant_id</th>\n      <th>action_type</th>\n      <th>times</th>\n      <th>one_clicks</th>\n      <th>shopping_carts</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>1</td>\n      <td>471</td>\n      <td>0</td>\n      <td>1</td>\n      <td>1</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>1</td>\n      <td>739</td>\n      <td>0</td>\n      <td>1</td>\n      <td>1</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>1</td>\n      <td>925</td>\n      <td>0</td>\n      <td>3</td>\n      <td>3</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>1</td>\n      <td>925</td>\n      <td>2</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>1</td>\n      <td>1019</td>\n      <td>0</td>\n      <td>10</td>\n      <td>10</td>\n      <td>0</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "one_clicks_temp[\"shopping_carts\"] = one_clicks_temp[\"action_type\"] == 1\n",
    "one_clicks_temp[\"shopping_carts\"] = one_clicks_temp[\"shopping_carts\"] * one_clicks_temp[\"times\"]\n",
    "one_clicks_temp.head()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-24T11:34:22.783628700Z",
     "start_time": "2024-09-24T11:34:22.029129300Z"
    }
   },
   "id": "3984f4d7754a0aae",
   "execution_count": 31
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "data": {
      "text/plain": "   user_id  merchant_id  action_type  times  one_clicks  shopping_carts  \\\n0        1          471            0      1           1               0   \n1        1          739            0      1           1               0   \n2        1          925            0      3           3               0   \n3        1          925            2      1           0               0   \n4        1         1019            0     10          10               0   \n\n   purchase_times  \n0               0  \n1               0  \n2               0  \n3               1  \n4               0  ",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>user_id</th>\n      <th>merchant_id</th>\n      <th>action_type</th>\n      <th>times</th>\n      <th>one_clicks</th>\n      <th>shopping_carts</th>\n      <th>purchase_times</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>1</td>\n      <td>471</td>\n      <td>0</td>\n      <td>1</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>1</td>\n      <td>739</td>\n      <td>0</td>\n      <td>1</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>1</td>\n      <td>925</td>\n      <td>0</td>\n      <td>3</td>\n      <td>3</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>1</td>\n      <td>925</td>\n      <td>2</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>1</td>\n      <td>1019</td>\n      <td>0</td>\n      <td>10</td>\n      <td>10</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "one_clicks_temp[\"purchase_times\"] = one_clicks_temp[\"action_type\"] == 2\n",
    "one_clicks_temp[\"purchase_times\"] = one_clicks_temp[\"purchase_times\"] * one_clicks_temp[\"times\"]\n",
    "one_clicks_temp.head()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-24T11:34:26.266939900Z",
     "start_time": "2024-09-24T11:34:25.812638900Z"
    }
   },
   "id": "f6800cf1aede1a46",
   "execution_count": 32
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "data": {
      "text/plain": "   user_id  merchant_id  action_type  times  one_clicks  shopping_carts  \\\n0        1          471            0      1           1               0   \n1        1          739            0      1           1               0   \n2        1          925            0      3           3               0   \n3        1          925            2      1           0               0   \n4        1         1019            0     10          10               0   \n\n   purchase_times  favourite_times  \n0               0                0  \n1               0                0  \n2               0                0  \n3               1                0  \n4               0                0  ",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>user_id</th>\n      <th>merchant_id</th>\n      <th>action_type</th>\n      <th>times</th>\n      <th>one_clicks</th>\n      <th>shopping_carts</th>\n      <th>purchase_times</th>\n      <th>favourite_times</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>1</td>\n      <td>471</td>\n      <td>0</td>\n      <td>1</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>1</td>\n      <td>739</td>\n      <td>0</td>\n      <td>1</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>1</td>\n      <td>925</td>\n      <td>0</td>\n      <td>3</td>\n      <td>3</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>1</td>\n      <td>925</td>\n      <td>2</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>1</td>\n      <td>1019</td>\n      <td>0</td>\n      <td>10</td>\n      <td>10</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "one_clicks_temp[\"favourite_times\"] = one_clicks_temp[\"action_type\"] == 3\n",
    "one_clicks_temp[\"favourite_times\"] = one_clicks_temp[\"favourite_times\"] * one_clicks_temp[\"times\"]\n",
    "one_clicks_temp.head()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-24T11:34:29.600151500Z",
     "start_time": "2024-09-24T11:34:29.109826600Z"
    }
   },
   "id": "779acbd7e7c9d677",
   "execution_count": 33
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "data": {
      "text/plain": "   user_id  merchant_id  action_type  times  one_clicks  shopping_carts  \\\n0        1          471            0      1           1               0   \n1        1          739            0      1           1               0   \n2        1          925            2      4           3               0   \n3        1         1019            2     14          10               0   \n4        1         1156            0      1           1               0   \n5        1         2245            0      5           5               0   \n6        1         4026            2      5           4               0   \n7        1         4177            0      1           1               0   \n8        1         4335            0      1           1               0   \n9        2          420            2     26          23               0   \n\n   purchase_times  favourite_times  \n0               0                0  \n1               0                0  \n2               1                0  \n3               4                0  \n4               0                0  \n5               0                0  \n6               1                0  \n7               0                0  \n8               0                0  \n9               3                0  ",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>user_id</th>\n      <th>merchant_id</th>\n      <th>action_type</th>\n      <th>times</th>\n      <th>one_clicks</th>\n      <th>shopping_carts</th>\n      <th>purchase_times</th>\n      <th>favourite_times</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>1</td>\n      <td>471</td>\n      <td>0</td>\n      <td>1</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>1</td>\n      <td>739</td>\n      <td>0</td>\n      <td>1</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>1</td>\n      <td>925</td>\n      <td>2</td>\n      <td>4</td>\n      <td>3</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>1</td>\n      <td>1019</td>\n      <td>2</td>\n      <td>14</td>\n      <td>10</td>\n      <td>0</td>\n      <td>4</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>1</td>\n      <td>1156</td>\n      <td>0</td>\n      <td>1</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>1</td>\n      <td>2245</td>\n      <td>0</td>\n      <td>5</td>\n      <td>5</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>6</th>\n      <td>1</td>\n      <td>4026</td>\n      <td>2</td>\n      <td>5</td>\n      <td>4</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>7</th>\n      <td>1</td>\n      <td>4177</td>\n      <td>0</td>\n      <td>1</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>8</th>\n      <td>1</td>\n      <td>4335</td>\n      <td>0</td>\n      <td>1</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>9</th>\n      <td>2</td>\n      <td>420</td>\n      <td>2</td>\n      <td>26</td>\n      <td>23</td>\n      <td>0</td>\n      <td>3</td>\n      <td>0</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "four_features = one_clicks_temp.groupby(\n",
    "    [one_clicks_temp[\"user_id\"], one_clicks_temp[\"merchant_id\"]]).sum().reset_index()\n",
    "four_features.head(10)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-24T11:34:44.962074500Z",
     "start_time": "2024-09-24T11:34:32.295940200Z"
    }
   },
   "id": "57816616c2e9b17e",
   "execution_count": 34
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "data": {
      "text/plain": "   user_id  merchant_id  one_clicks  shopping_carts  purchase_times  \\\n0        1          471           1               0               0   \n1        1          739           1               0               0   \n2        1          925           3               0               1   \n3        1         1019          10               0               4   \n4        1         1156           1               0               0   \n\n   favourite_times  \n0                0  \n1                0  \n2                0  \n3                0  \n4                0  ",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>user_id</th>\n      <th>merchant_id</th>\n      <th>one_clicks</th>\n      <th>shopping_carts</th>\n      <th>purchase_times</th>\n      <th>favourite_times</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>1</td>\n      <td>471</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>1</td>\n      <td>739</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>1</td>\n      <td>925</td>\n      <td>3</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>1</td>\n      <td>1019</td>\n      <td>10</td>\n      <td>0</td>\n      <td>4</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>1</td>\n      <td>1156</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "four_features = four_features.drop([\"action_type\", \"times\"], axis=1)\n",
    "four_features.head()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-24T11:34:54.954881200Z",
     "start_time": "2024-09-24T11:34:54.159828200Z"
    }
   },
   "id": "8a95bb7533e93425",
   "execution_count": 35
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "data": {
      "text/plain": "    user_id  merchant_id  label  age_range  gender  total_logs  \\\n0     34176         3906      0        6.0     0.0          39   \n1     34176         3906      0        6.0     0.0          39   \n2     34176         3906      0        6.0     0.0          39   \n3     34176         3906      0        6.0     0.0          39   \n4     34176         3906      0        6.0     0.0          39   \n5     34176         3906      0        6.0     0.0          39   \n6     34176          121      0        6.0     0.0          14   \n7     34176         4356      1        6.0     0.0          18   \n8     34176         2217      0        6.0     0.0           2   \n9    230784         4818      0        0.0     0.0           8   \n10   362112         2618      0        4.0     1.0           1   \n11    34944         2051      0        5.0     0.0           3   \n12   231552         3828      1        5.0     0.0          83   \n13   231552         3828      1        5.0     0.0          83   \n14   231552         3828      1        5.0     0.0          83   \n15   231552         3828      1        5.0     0.0          83   \n16   231552         3828      1        5.0     0.0          83   \n17   231552         3828      1        5.0     0.0          83   \n18   231552         3828      1        5.0     0.0          83   \n19   231552         3828      1        5.0     0.0          83   \n20   231552         3828      1        5.0     0.0          83   \n21   231552         3828      1        5.0     0.0          83   \n22   231552         3828      1        5.0     0.0          83   \n23   231552         3828      1        5.0     0.0          83   \n24   231552         3828      1        5.0     0.0          83   \n25   231552         3828      1        5.0     0.0          83   \n26   231552         3828      1        5.0     0.0          83   \n27   231552         2124      0        5.0     0.0           7   \n28   232320         1168      0        4.0     1.0           4   \n29   232320         4270      0        4.0     1.0          22   \n\n    unique_item_ids  categories  browse_days  one_clicks  shopping_carts  \\\n0                20           1            9          36               0   \n1                20           7            9          36               0   \n2                20          24            9          36               0   \n3                20           1            9          36               0   \n4                20           2            9          36               0   \n5                20           4            9          36               0   \n6                 1          14            3          13               0   \n7                 2          18            2          12               0   \n8                 1           2            1           1               0   \n9                 1           8            3           7               0   \n10                1           1            1           0               0   \n11                2           3            1           2               0   \n12               48           1            3          78               0   \n13               48           2            3          78               0   \n14               48           8            3          78               0   \n15               48           3            3          78               0   \n16               48           4            3          78               0   \n17               48           2            3          78               0   \n18               48           3            3          78               0   \n19               48           8            3          78               0   \n20               48           2            3          78               0   \n21               48           2            3          78               0   \n22               48           5            3          78               0   \n23               48           1            3          78               0   \n24               48          22            3          78               0   \n25               48          11            3          78               0   \n26               48           9            3          78               0   \n27                4           7            1           6               0   \n28                1           4            2           2               0   \n29               13           5            2          13               0   \n\n    purchase_times  favourite_times  \n0                1                2  \n1                1                2  \n2                1                2  \n3                1                2  \n4                1                2  \n5                1                2  \n6                1                0  \n7                6                0  \n8                1                0  \n9                1                0  \n10               1                0  \n11               1                0  \n12               5                0  \n13               5                0  \n14               5                0  \n15               5                0  \n16               5                0  \n17               5                0  \n18               5                0  \n19               5                0  \n20               5                0  \n21               5                0  \n22               5                0  \n23               5                0  \n24               5                0  \n25               5                0  \n26               5                0  \n27               1                0  \n28               1                1  \n29               2                7  ",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>user_id</th>\n      <th>merchant_id</th>\n      <th>label</th>\n      <th>age_range</th>\n      <th>gender</th>\n      <th>total_logs</th>\n      <th>unique_item_ids</th>\n      <th>categories</th>\n      <th>browse_days</th>\n      <th>one_clicks</th>\n      <th>shopping_carts</th>\n      <th>purchase_times</th>\n      <th>favourite_times</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>34176</td>\n      <td>3906</td>\n      <td>0</td>\n      <td>6.0</td>\n      <td>0.0</td>\n      <td>39</td>\n      <td>20</td>\n      <td>1</td>\n      <td>9</td>\n      <td>36</td>\n      <td>0</td>\n      <td>1</td>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>34176</td>\n      <td>3906</td>\n      <td>0</td>\n      <td>6.0</td>\n      <td>0.0</td>\n      <td>39</td>\n      <td>20</td>\n      <td>7</td>\n      <td>9</td>\n      <td>36</td>\n      <td>0</td>\n      <td>1</td>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>34176</td>\n      <td>3906</td>\n      <td>0</td>\n      <td>6.0</td>\n      <td>0.0</td>\n      <td>39</td>\n      <td>20</td>\n      <td>24</td>\n      <td>9</td>\n      <td>36</td>\n      <td>0</td>\n      <td>1</td>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>34176</td>\n      <td>3906</td>\n      <td>0</td>\n      <td>6.0</td>\n      <td>0.0</td>\n      <td>39</td>\n      <td>20</td>\n      <td>1</td>\n      <td>9</td>\n      <td>36</td>\n      <td>0</td>\n      <td>1</td>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>34176</td>\n      <td>3906</td>\n      <td>0</td>\n      <td>6.0</td>\n      <td>0.0</td>\n      <td>39</td>\n      <td>20</td>\n      <td>2</td>\n      <td>9</td>\n      <td>36</td>\n      <td>0</td>\n      <td>1</td>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>34176</td>\n      <td>3906</td>\n      <td>0</td>\n      <td>6.0</td>\n      <td>0.0</td>\n      <td>39</td>\n      <td>20</td>\n      <td>4</td>\n      <td>9</td>\n      <td>36</td>\n      <td>0</td>\n      <td>1</td>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>6</th>\n      <td>34176</td>\n      <td>121</td>\n      <td>0</td>\n      <td>6.0</td>\n      <td>0.0</td>\n      <td>14</td>\n      <td>1</td>\n      <td>14</td>\n      <td>3</td>\n      <td>13</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>7</th>\n      <td>34176</td>\n      <td>4356</td>\n      <td>1</td>\n      <td>6.0</td>\n      <td>0.0</td>\n      <td>18</td>\n      <td>2</td>\n      <td>18</td>\n      <td>2</td>\n      <td>12</td>\n      <td>0</td>\n      <td>6</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>8</th>\n      <td>34176</td>\n      <td>2217</td>\n      <td>0</td>\n      <td>6.0</td>\n      <td>0.0</td>\n      <td>2</td>\n      <td>1</td>\n      <td>2</td>\n      <td>1</td>\n      <td>1</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>9</th>\n      <td>230784</td>\n      <td>4818</td>\n      <td>0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>8</td>\n      <td>1</td>\n      <td>8</td>\n      <td>3</td>\n      <td>7</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>10</th>\n      <td>362112</td>\n      <td>2618</td>\n      <td>0</td>\n      <td>4.0</td>\n      <td>1.0</td>\n      <td>1</td>\n      <td>1</td>\n      <td>1</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>11</th>\n      <td>34944</td>\n      <td>2051</td>\n      <td>0</td>\n      <td>5.0</td>\n      <td>0.0</td>\n      <td>3</td>\n      <td>2</td>\n      <td>3</td>\n      <td>1</td>\n      <td>2</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>12</th>\n      <td>231552</td>\n      <td>3828</td>\n      <td>1</td>\n      <td>5.0</td>\n      <td>0.0</td>\n      <td>83</td>\n      <td>48</td>\n      <td>1</td>\n      <td>3</td>\n      <td>78</td>\n      <td>0</td>\n      <td>5</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>13</th>\n      <td>231552</td>\n      <td>3828</td>\n      <td>1</td>\n      <td>5.0</td>\n      <td>0.0</td>\n      <td>83</td>\n      <td>48</td>\n      <td>2</td>\n      <td>3</td>\n      <td>78</td>\n      <td>0</td>\n      <td>5</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>14</th>\n      <td>231552</td>\n      <td>3828</td>\n      <td>1</td>\n      <td>5.0</td>\n      <td>0.0</td>\n      <td>83</td>\n      <td>48</td>\n      <td>8</td>\n      <td>3</td>\n      <td>78</td>\n      <td>0</td>\n      <td>5</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>15</th>\n      <td>231552</td>\n      <td>3828</td>\n      <td>1</td>\n      <td>5.0</td>\n      <td>0.0</td>\n      <td>83</td>\n      <td>48</td>\n      <td>3</td>\n      <td>3</td>\n      <td>78</td>\n      <td>0</td>\n      <td>5</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>16</th>\n      <td>231552</td>\n      <td>3828</td>\n      <td>1</td>\n      <td>5.0</td>\n      <td>0.0</td>\n      <td>83</td>\n      <td>48</td>\n      <td>4</td>\n      <td>3</td>\n      <td>78</td>\n      <td>0</td>\n      <td>5</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>17</th>\n      <td>231552</td>\n      <td>3828</td>\n      <td>1</td>\n      <td>5.0</td>\n      <td>0.0</td>\n      <td>83</td>\n      <td>48</td>\n      <td>2</td>\n      <td>3</td>\n      <td>78</td>\n      <td>0</td>\n      <td>5</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>18</th>\n      <td>231552</td>\n      <td>3828</td>\n      <td>1</td>\n      <td>5.0</td>\n      <td>0.0</td>\n      <td>83</td>\n      <td>48</td>\n      <td>3</td>\n      <td>3</td>\n      <td>78</td>\n      <td>0</td>\n      <td>5</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>19</th>\n      <td>231552</td>\n      <td>3828</td>\n      <td>1</td>\n      <td>5.0</td>\n      <td>0.0</td>\n      <td>83</td>\n      <td>48</td>\n      <td>8</td>\n      <td>3</td>\n      <td>78</td>\n      <td>0</td>\n      <td>5</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>20</th>\n      <td>231552</td>\n      <td>3828</td>\n      <td>1</td>\n      <td>5.0</td>\n      <td>0.0</td>\n      <td>83</td>\n      <td>48</td>\n      <td>2</td>\n      <td>3</td>\n      <td>78</td>\n      <td>0</td>\n      <td>5</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>21</th>\n      <td>231552</td>\n      <td>3828</td>\n      <td>1</td>\n      <td>5.0</td>\n      <td>0.0</td>\n      <td>83</td>\n      <td>48</td>\n      <td>2</td>\n      <td>3</td>\n      <td>78</td>\n      <td>0</td>\n      <td>5</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>22</th>\n      <td>231552</td>\n      <td>3828</td>\n      <td>1</td>\n      <td>5.0</td>\n      <td>0.0</td>\n      <td>83</td>\n      <td>48</td>\n      <td>5</td>\n      <td>3</td>\n      <td>78</td>\n      <td>0</td>\n      <td>5</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>23</th>\n      <td>231552</td>\n      <td>3828</td>\n      <td>1</td>\n      <td>5.0</td>\n      <td>0.0</td>\n      <td>83</td>\n      <td>48</td>\n      <td>1</td>\n      <td>3</td>\n      <td>78</td>\n      <td>0</td>\n      <td>5</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>24</th>\n      <td>231552</td>\n      <td>3828</td>\n      <td>1</td>\n      <td>5.0</td>\n      <td>0.0</td>\n      <td>83</td>\n      <td>48</td>\n      <td>22</td>\n      <td>3</td>\n      <td>78</td>\n      <td>0</td>\n      <td>5</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>25</th>\n      <td>231552</td>\n      <td>3828</td>\n      <td>1</td>\n      <td>5.0</td>\n      <td>0.0</td>\n      <td>83</td>\n      <td>48</td>\n      <td>11</td>\n      <td>3</td>\n      <td>78</td>\n      <td>0</td>\n      <td>5</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>26</th>\n      <td>231552</td>\n      <td>3828</td>\n      <td>1</td>\n      <td>5.0</td>\n      <td>0.0</td>\n      <td>83</td>\n      <td>48</td>\n      <td>9</td>\n      <td>3</td>\n      <td>78</td>\n      <td>0</td>\n      <td>5</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>27</th>\n      <td>231552</td>\n      <td>2124</td>\n      <td>0</td>\n      <td>5.0</td>\n      <td>0.0</td>\n      <td>7</td>\n      <td>4</td>\n      <td>7</td>\n      <td>1</td>\n      <td>6</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>28</th>\n      <td>232320</td>\n      <td>1168</td>\n      <td>0</td>\n      <td>4.0</td>\n      <td>1.0</td>\n      <td>4</td>\n      <td>1</td>\n      <td>4</td>\n      <td>2</td>\n      <td>2</td>\n      <td>0</td>\n      <td>1</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>29</th>\n      <td>232320</td>\n      <td>4270</td>\n      <td>0</td>\n      <td>4.0</td>\n      <td>1.0</td>\n      <td>22</td>\n      <td>13</td>\n      <td>5</td>\n      <td>2</td>\n      <td>13</td>\n      <td>0</td>\n      <td>2</td>\n      <td>7</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = pd.merge(data,four_features,on=[\"user_id\",\"merchant_id\"],how=\"left\")\n",
    "data.head(30)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-24T11:35:05.616116800Z",
     "start_time": "2024-09-24T11:34:57.310967100Z"
    }
   },
   "id": "27592b8c5d006827",
   "execution_count": 36
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "data": {
      "text/plain": "label\nnan    492617\n0.0    448608\n1.0     41953\nName: count, dtype: int64"
     },
     "execution_count": 67,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data['label'].value_counts()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-23T12:36:22.836798300Z",
     "start_time": "2024-09-23T12:36:22.634666900Z"
    }
   },
   "id": "92134a1df9e4dcf8",
   "execution_count": 67
  },
  {
   "cell_type": "code",
   "outputs": [],
   "source": [
    "# 随机采样\n",
    "sample_fraction = 0.2  # 采样比例\n",
    "df_train = data.sample(frac=sample_fraction, random_state=42)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-24T11:35:09.634817100Z",
     "start_time": "2024-09-24T11:35:09.401662800Z"
    }
   },
   "id": "f76c05cbfd4d4472",
   "execution_count": 37
  },
  {
   "cell_type": "code",
   "outputs": [],
   "source": [],
   "metadata": {
    "collapsed": false
   },
   "id": "9aae290703121451"
  },
  {
   "cell_type": "code",
   "outputs": [],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "X = df_train.drop('label',axis=1)\n",
    "y = df_train['label']\n",
    "X_train,X_val,y_train,y_val = train_test_split(X, y, test_size=0.3, random_state=42)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-24T11:35:40.015289300Z",
     "start_time": "2024-09-24T11:35:36.571116100Z"
    }
   },
   "id": "231aba80b2fab444",
   "execution_count": 38
  },
  {
   "cell_type": "code",
   "outputs": [],
   "source": [
    "from sklearn.ensemble import RandomForestClassifier\n",
    "from sklearn.metrics import roc_auc_score"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-24T11:35:47.364453900Z",
     "start_time": "2024-09-24T11:35:44.271248100Z"
    }
   },
   "id": "a1d1ebf5db12cef1",
   "execution_count": 39
  },
  {
   "cell_type": "code",
   "outputs": [],
   "source": [
    "# 在不设置 class_weight 的情况下，随机森林默认给予所有样本相同的权重。如果数据集不平衡，模型可能会偏向于多数类。 设置 class_weight='balance'：这会根据类别频率自动调整权重，使得模型在训练时更加关注少数类。具体来说，权重是 n_samples / (n_classes * np.bincount(y))，其中 n_samples 是样本总数，n_classes 是类别数，np.bincount(y) 是每个类别的样本数。\n",
    "model = RandomForestClassifier(n_estimators=70,max_depth=11, random_state=42,class_weight='balanced')\n",
    "model.fit(X_train,y_train)\n",
    "y_pred=model.predict(X_val)\n",
    "y_proba = model.predict_proba(X_val)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-24T11:35:57.912589200Z",
     "start_time": "2024-09-24T11:35:49.259848Z"
    }
   },
   "id": "d678f0a05dc415b3",
   "execution_count": 40
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "验证集auc:  0.6485098567952658\n"
     ]
    }
   ],
   "source": [
    "auc = roc_auc_score(y_val,y_pred)\n",
    "print('验证集auc: ',auc)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-24T11:36:02.915091500Z",
     "start_time": "2024-09-24T11:36:02.784006100Z"
    }
   },
   "id": "afc91b7279bb790f",
   "execution_count": 41
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "训练集auc:  0.7536871220490418\n"
     ]
    }
   ],
   "source": [
    "y_preds=model.predict(X_train)\n",
    "auc = roc_auc_score(y_train,y_preds)\n",
    "print('训练集auc: ',auc)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-24T11:36:06.573426500Z",
     "start_time": "2024-09-24T11:36:05.492803Z"
    }
   },
   "id": "c0b8e31711f651f5",
   "execution_count": 42
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "user_id: 0.12096531395348346\n",
      "merchant_id: 0.11989063044613282\n",
      "age_range: 0.0455565307186497\n",
      "gender: 0.021791920270467877\n",
      "total_logs: 0.13158934352260745\n",
      "unique_item_ids: 0.17787097606796531\n",
      "categories: 0.05300185383310342\n",
      "browse_days: 0.08466912322987172\n",
      "one_clicks: 0.11949556764108554\n",
      "shopping_carts: 0.009707612820791476\n",
      "purchase_times: 0.07142014309459892\n",
      "favourite_times: 0.04404098440124221\n"
     ]
    }
   ],
   "source": [
    "# 打印特征重要性\n",
    "for name, importance in zip(X_train, model.feature_importances_):\n",
    "    print(f\"{name}: {importance}\")"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-24T03:03:33.258300500Z",
     "start_time": "2024-09-24T03:03:33.171243100Z"
    }
   },
   "id": "2413f997a26ec5ba",
   "execution_count": 70
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "验证集auc:  0.666214143097564\n",
      "训练集auc:  0.6980103445318198\n"
     ]
    }
   ],
   "source": [
    "from xgboost import XGBClassifier\n",
    "bst = XGBClassifier(n_estimators=10, max_depth=4, learning_rate=1, objective='binary:logistic')\n",
    "# fit model\n",
    "bst.fit(X_train, y_train)\n",
    "y_pred1 =bst.predict(X_val)\n",
    "y_proba1 = bst.predict_proba(X_val)[:,1]\n",
    "auc1 = roc_auc_score(y_val,y_proba1)\n",
    "print('验证集auc: ',auc1)\n",
    "y_preds1=bst.predict_proba(X_train)[:,1]\n",
    "auc = roc_auc_score(y_train,y_preds1)\n",
    "print('训练集auc: ',auc)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-24T13:00:32.461957700Z",
     "start_time": "2024-09-24T13:00:32.020165100Z"
    }
   },
   "id": "496c86699834ea9d",
   "execution_count": 77
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[LightGBM] [Info] Number of positive: 5926, number of negative: 62752\n",
      "[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.016530 seconds.\n",
      "You can set `force_col_wise=true` to remove the overhead.\n",
      "[LightGBM] [Info] Total Bins 1399\n",
      "[LightGBM] [Info] Number of data points in the train set: 68678, number of used features: 12\n",
      "[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.086287 -> initscore=-2.359841\n",
      "[LightGBM] [Info] Start training from score -2.359841\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "roc_auc： 0.7008724219330529\n"
     ]
    }
   ],
   "source": [
    "from lightgbm import LGBMClassifier\n",
    "gbm = LGBMClassifier(   \n",
    "    boosting_type=\"dart\",\n",
    "    learning_rate=0.05,\n",
    "    max_depth = 10,\n",
    "    min_split_gain = 0.05,\n",
    "    n_estimators = 1000,\n",
    "    num_leaves = 30,\n",
    "    subsample = 0.5)\n",
    "gbm.fit(X_train, y_train)\n",
    "y_pred2 = gbm.predict_proba(X_val)\n",
    "auc_lgbm = roc_auc_score(y_val,gbm.predict_proba(X_val)[:,1])\n",
    "auc_lgbm1 = roc_auc_score(y_train,gbm.predict_proba(X_train)[:,1])\n",
    "print('roc_auc：',auc_lgbm)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-24T12:16:56.018456300Z",
     "start_time": "2024-09-24T12:15:04.020130400Z"
    }
   },
   "id": "fdd30989be3efaaf",
   "execution_count": 58
  },
  {
   "cell_type": "code",
   "outputs": [],
   "source": [
    "df_test = pd.merge(test,user_info,on=\"user_id\",how=\"left\")\n",
    "df_test = pd.merge(df_test,total_logs,on=[\"user_id\",\"merchant_id\"],how=\"left\")\n",
    "df_test = pd.merge(df_test,unique_item_ids,on=[\"user_id\",\"merchant_id\"],how=\"left\")\n",
    "df_test = pd.merge(df_test,categories,on=[\"user_id\",\"merchant_id\"],how=\"left\")\n",
    "df_test = pd.merge(df_test,browse_days_temp1,on=[\"user_id\",\"merchant_id\"],how=\"left\")\n",
    "df_test = pd.merge(df_test,four_features,on=[\"user_id\",\"merchant_id\"],how=\"left\")"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-24T12:22:06.965967400Z",
     "start_time": "2024-09-24T12:21:11.692765600Z"
    }
   },
   "id": "2276d9ec5f2c3d2d",
   "execution_count": 67
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "data": {
      "text/plain": "   user_id  merchant_id  prob  age_range  gender  total_logs  unique_item_ids  \\\n0   163968         4605   NaN        0.0     0.0           2                1   \n1   360576         1581   NaN        2.0     2.0          10                9   \n2   360576         1581   NaN        2.0     2.0          10                9   \n3   360576         1581   NaN        2.0     2.0          10                9   \n4   360576         1581   NaN        2.0     2.0          10                9   \n\n   categories  browse_days  one_clicks  shopping_carts  purchase_times  \\\n0           2            1           1               0               1   \n1           1            1           5               0               5   \n2           1            1           5               0               5   \n3           7            1           5               0               5   \n4           1            1           5               0               5   \n\n   favourite_times  \n0                0  \n1                0  \n2                0  \n3                0  \n4                0  ",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>user_id</th>\n      <th>merchant_id</th>\n      <th>prob</th>\n      <th>age_range</th>\n      <th>gender</th>\n      <th>total_logs</th>\n      <th>unique_item_ids</th>\n      <th>categories</th>\n      <th>browse_days</th>\n      <th>one_clicks</th>\n      <th>shopping_carts</th>\n      <th>purchase_times</th>\n      <th>favourite_times</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>163968</td>\n      <td>4605</td>\n      <td>NaN</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>2</td>\n      <td>1</td>\n      <td>2</td>\n      <td>1</td>\n      <td>1</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>360576</td>\n      <td>1581</td>\n      <td>NaN</td>\n      <td>2.0</td>\n      <td>2.0</td>\n      <td>10</td>\n      <td>9</td>\n      <td>1</td>\n      <td>1</td>\n      <td>5</td>\n      <td>0</td>\n      <td>5</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>360576</td>\n      <td>1581</td>\n      <td>NaN</td>\n      <td>2.0</td>\n      <td>2.0</td>\n      <td>10</td>\n      <td>9</td>\n      <td>1</td>\n      <td>1</td>\n      <td>5</td>\n      <td>0</td>\n      <td>5</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>360576</td>\n      <td>1581</td>\n      <td>NaN</td>\n      <td>2.0</td>\n      <td>2.0</td>\n      <td>10</td>\n      <td>9</td>\n      <td>7</td>\n      <td>1</td>\n      <td>5</td>\n      <td>0</td>\n      <td>5</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>360576</td>\n      <td>1581</td>\n      <td>NaN</td>\n      <td>2.0</td>\n      <td>2.0</td>\n      <td>10</td>\n      <td>9</td>\n      <td>1</td>\n      <td>1</td>\n      <td>5</td>\n      <td>0</td>\n      <td>5</td>\n      <td>0</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_test.head()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-24T02:55:43.979691300Z",
     "start_time": "2024-09-24T02:55:43.880623200Z"
    }
   },
   "id": "bf510c67d9152df9",
   "execution_count": 46
  },
  {
   "cell_type": "code",
   "outputs": [],
   "source": [
    "X = df_test.drop('prob',axis=1)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-24T12:23:10.422880900Z",
     "start_time": "2024-09-24T12:23:10.307805700Z"
    }
   },
   "id": "4cce457162143043",
   "execution_count": 68
  },
  {
   "cell_type": "code",
   "outputs": [],
   "source": [
    "y_prob = bst.predict_proba(X)\n",
    "df_test['prob']=pd.Series(y_prob[:,1])\n",
    "df_test[['user_id','merchant_id','prob']].to_csv('prediction.csv',index=False)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-24T13:00:51.432655400Z",
     "start_time": "2024-09-24T13:00:48.863450100Z"
    }
   },
   "id": "e3b60a19651eff5e",
   "execution_count": 78
  },
  {
   "cell_type": "code",
   "outputs": [],
   "source": [],
   "metadata": {
    "collapsed": false
   },
   "id": "bb81b88a03ff07bd"
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
